Exemple #1
0
    def partial_eval(self):
        lhs = ES52[self.lhs].partial_eval()
        rhs = ES52[self.rhs].partial_eval()

        if is_literal(lhs):
            if is_literal(rhs):
                return FALSE if value_compare(lhs.value, rhs.value) else TRUE
            else:
                lhs, rhs = rhs, lhs  # FLIP SO WE CAN USE TERMS FILTER

        if is_literal(rhs) and same_json_type(lhs.type, BOOLEAN):
            # SPECIAL CASE true == "T"
            rhs = string2boolean(rhs.value)
            if rhs is None:
                return FALSE
            rhs = Literal(rhs)
            return EqOp([lhs, rhs])
        if lhs.type != OBJECT and rhs.type != OBJECT and not same_json_type(
                lhs.type, rhs.type):
            # OBJECT MEANS WE REALLY DO NOT KNOW THE TYPE
            return FALSE
        if is_op(lhs, NestedOp):
            return self.lang[NestedOp(path=lhs.frum,
                                      where=AndOp(
                                          [lhs.where,
                                           EqOp([lhs.select, rhs])]))]

        return EqOp([lhs, rhs])
Exemple #2
0
    def to_bq(self, schema, not_null=False, boolean=False):
        value = BQLang[self.term].to_bq(schema)

        if same_json_type(value.data_type, NUMBER):
            return value
        else:
            return BQLScript(data_type=NUMBER,
                             expr=sql_call(
                                 "CAST", ConcatSQL(value, SQL_AS,
                                                   SQL_FLOAT64)),
                             frum=self,
                             miss=self.missing(),
                             many=False,
                             schema=schema)
    def __init__(self, term, **clauses):
        Expression.__init__(self, [term])

        self.when = term
        self.then = coalesce(clauses.get("then"), NULL)
        self.els_ = coalesce(clauses.get("else"), NULL)

        if self.then is NULL:
            self.data_type = self.els_.type
        elif self.els_ is NULL:
            self.data_type = self.then.type
        elif same_json_type(self.then.type, self.els_.type):
            self.data_type = merge_json_type(self.then.type, self.els_.type)
        else:
            self.data_type = OBJECT
Exemple #4
0
    def to_es(self, schema):
        if is_op(self.lhs, Variable_) and is_literal(self.rhs):
            rhs = self.rhs.value
            lhs = self.lhs.var
            cols = schema.leaves(lhs)
            if not cols:
                Log.warning(
                    "{{col}} does not exist while processing {{expr}}",
                    col=lhs,
                    expr=self.__data__(),
                )

            if is_container(rhs):
                if len(rhs) == 1:
                    rhs = rhs[0]
                else:
                    types = Data()  # MAP JSON TYPE TO LIST OF LITERALS
                    for r in rhs:
                        types[python_type_to_json_type[r.__class__]] += [r]
                    if len(types) == 1:
                        jx_type, values = first(types.items())
                        for c in cols:
                            if same_json_type(jx_type, c.jx_type):
                                return {"terms": {c.es_column: values}}
                        return FALSE.to_es(schema)
                    else:
                        return (OrOp([
                            EqOp([self.lhs, values])
                            for t, values in types.items()
                        ]).partial_eval().to_es(schema))

            for c in cols:
                if c.jx_type == BOOLEAN:
                    rhs = pull_functions[c.jx_type](rhs)
                rhs_type = python_type_to_json_type[rhs.__class__]
                if rhs_type == c.jx_type or (rhs_type in NUMBER_TYPES
                                             and c.jx_type in NUMBER_TYPES):
                    return {"term": {c.es_column: rhs}}
            return FALSE.to_es(schema)
        else:
            return ES52[CaseOp([
                WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}),
                WhenOp(self.rhs.missing(), **{"then": FALSE}),
                BasicEqOp([self.lhs, self.rhs]),
            ]).partial_eval()].to_es(schema)
def typed_encode(value, sub_schema, path, net_new_properties, buffer):
    """
    :param value: THE DATA STRUCTURE TO ENCODE
    :param sub_schema: dict FROM PATH TO Column DESCRIBING THE TYPE
    :param path: list OF CURRENT PATH
    :param net_new_properties: list FOR ADDING NEW PROPERTIES NOT FOUND IN sub_schema
    :param buffer: UnicodeBuilder OBJECT
    :return:
    """
    try:
        # from jx_base import Column
        if sub_schema.__class__.__name__ == 'Column':
            value_json_type = python_type_to_json_type[value.__class__]
            column_json_type = es_type_to_json_type[sub_schema.es_type]

            if same_json_type(value_json_type, column_json_type):
                pass  # ok
            elif value_json_type == NESTED and all(
                    python_type_to_json_type[v.__class__] == column_json_type
                    for v in value if v != None):
                pass  # empty arrays can be anything
            else:
                from mo_logs import Log

                Log.error("Can not store {{value}} in {{column|quote}}",
                          value=value,
                          column=sub_schema.name)

            sub_schema = {
                json_type_to_inserter_type[value_json_type]: sub_schema
            }

        if value == None and path:
            from mo_logs import Log
            Log.error("can not encode null (missing) values")
        elif value is True:
            if BOOLEAN_TYPE not in sub_schema:
                sub_schema[BOOLEAN_TYPE] = {}
                net_new_properties.append(path + [BOOLEAN_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_BOOLEAN_TYPE)
            append(buffer, 'true}')
            return
        elif value is False:
            if BOOLEAN_TYPE not in sub_schema:
                sub_schema[BOOLEAN_TYPE] = {}
                net_new_properties.append(path + [BOOLEAN_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_BOOLEAN_TYPE)
            append(buffer, 'false}')
            return

        _type = value.__class__
        if _type in (dict, Data):
            if sub_schema.__class__.__name__ == 'Column':
                from mo_logs import Log
                Log.error("Can not handle {{column|json}}", column=sub_schema)

            if NESTED_TYPE in sub_schema:
                # PREFER NESTED, WHEN SEEN BEFORE
                if value:
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    append(buffer, '[')
                    _dict2json(value, sub_schema[NESTED_TYPE],
                               path + [NESTED_TYPE], net_new_properties,
                               buffer)
                    append(buffer, ']' + COMMA)
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, text(len(value)))
                    append(buffer, '}')
                else:
                    # SINGLETON LIST
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    append(buffer, '[{')
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, '1}]')
                    append(buffer, COMMA)
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, '1}')
            else:
                if EXISTS_TYPE not in sub_schema:
                    sub_schema[EXISTS_TYPE] = {}
                    net_new_properties.append(path + [EXISTS_TYPE])

                if value:
                    _dict2json(value, sub_schema, path, net_new_properties,
                               buffer)
                else:
                    append(buffer, '{')
                    append(buffer, QUOTED_EXISTS_TYPE)
                    append(buffer, '1}')
        elif _type is binary_type:
            if STRING_TYPE not in sub_schema:
                sub_schema[STRING_TYPE] = True
                net_new_properties.append(path + [STRING_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_STRING_TYPE)
            append(buffer, '"')
            try:
                v = value.decode('utf8')
            except Exception as e:
                raise problem_serializing(value, e)

            for c in v:
                append(buffer, ESCAPE_DCT.get(c, c))
            append(buffer, '"}')
        elif _type is text:
            if STRING_TYPE not in sub_schema:
                sub_schema[STRING_TYPE] = True
                net_new_properties.append(path + [STRING_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_STRING_TYPE)
            append(buffer, '"')
            for c in value:
                append(buffer, ESCAPE_DCT.get(c, c))
            append(buffer, '"}')
        elif _type in integer_types:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])

            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, text(value))
            append(buffer, '}')
        elif _type in (float, Decimal):
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value))
            append(buffer, '}')
        elif _type in (set, list, tuple, FlatList):
            if len(value) == 0:
                append(buffer, '{')
                append(buffer, QUOTED_EXISTS_TYPE)
                append(buffer, '0}')
            elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList)
                     for v in value):
                # THIS IS NOT DONE BECAUSE
                if len(value) == 1:
                    if NESTED_TYPE in sub_schema:
                        append(buffer, '{')
                        append(buffer, QUOTED_NESTED_TYPE)
                        _list2json(value, sub_schema[NESTED_TYPE],
                                   path + [NESTED_TYPE], net_new_properties,
                                   buffer)
                        append(buffer, '}')
                    else:
                        # NO NEED TO NEST, SO DO NOT DO IT
                        typed_encode(value[0], sub_schema, path,
                                     net_new_properties, buffer)
                else:
                    if NESTED_TYPE not in sub_schema:
                        sub_schema[NESTED_TYPE] = {}
                        net_new_properties.append(path + [NESTED_TYPE])
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    _list2json(value, sub_schema[NESTED_TYPE],
                               path + [NESTED_TYPE], net_new_properties,
                               buffer)
                    append(buffer, '}')
            else:
                # ALLOW PRIMITIVE MULTIVALUES
                value = [v for v in value if v != None]
                types = list(
                    set(json_type_to_inserter_type[python_type_to_json_type[
                        v.__class__]] for v in value))
                if len(types) == 0:  # HANDLE LISTS WITH Nones IN THEM
                    append(buffer, '{')
                    append(buffer, QUOTED_NESTED_TYPE)
                    append(buffer, '[]}')
                elif len(types) > 1:
                    _list2json(value, sub_schema, path + [NESTED_TYPE],
                               net_new_properties, buffer)
                else:
                    element_type = types[0]
                    if element_type not in sub_schema:
                        sub_schema[element_type] = True
                        net_new_properties.append(path + [element_type])
                    append(buffer, '{')
                    append(buffer, quote(element_type))
                    append(buffer, COLON)
                    _multivalue2json(value, sub_schema[element_type],
                                     path + [element_type], net_new_properties,
                                     buffer)
                    append(buffer, '}')
        elif _type is date:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(datetime2unix(value)))
            append(buffer, '}')
        elif _type is datetime:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(datetime2unix(value)))
            append(buffer, '}')
        elif _type is Date:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value.unix))
            append(buffer, '}')
        elif _type is timedelta:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value.total_seconds()))
            append(buffer, '}')
        elif _type is Duration:
            if NUMBER_TYPE not in sub_schema:
                sub_schema[NUMBER_TYPE] = True
                net_new_properties.append(path + [NUMBER_TYPE])
            append(buffer, '{')
            append(buffer, QUOTED_NUMBER_TYPE)
            append(buffer, float2json(value.seconds))
            append(buffer, '}')
        elif _type is NullType:
            append(buffer, 'null')
        elif hasattr(value, '__data__'):
            typed_encode(value.__data__(), sub_schema, path,
                         net_new_properties, buffer)
        elif hasattr(value, '__iter__'):
            if NESTED_TYPE not in sub_schema:
                sub_schema[NESTED_TYPE] = {}
                net_new_properties.append(path + [NESTED_TYPE])

            append(buffer, '{')
            append(buffer, QUOTED_NESTED_TYPE)
            _iter2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE],
                       net_new_properties, buffer)
            append(buffer, '}')
        else:
            from mo_logs import Log

            Log.error(text(repr(value)) + " is not JSON serializable")
    except Exception as e:
        from mo_logs import Log

        Log.error(text(repr(value)) + " is not JSON serializable", cause=e)