Esempio n. 1
0
def find_container(frum, after):
    """
    :param frum:
    :return:
    """
    global namespace
    if not namespace:
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )
        namespace = ElasticsearchMetadata(container.config.default.settings)
    if not frum:
        Log.error("expecting json query expression with from clause")

    # FORCE A RELOAD
    namespace.get_columns(frum, after=after)

    if is_text(frum):
        if frum in container_cache:
            return container_cache[frum]

        path = split_field(frum)
        if path[0] == "meta":
            if path[1] == "columns":
                return namespace.meta.columns.denormalized()
            elif path[1] == "tables":
                return namespace.meta.tables
            else:
                fact_table_name = join_field(path[:2])
        else:
            fact_table_name = path[0]

        type_ = container.config.default.type

        settings = set_default(
            {
                "alias": fact_table_name,
                "name": frum,
                "exists": True
            },
            container.config.default.settings,
        )
        settings.type = None
        output = container.type2container[type_](settings)
        container_cache[frum] = output
        return output
    elif is_data(frum) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif is_data(frum) and (frum["from"] or is_container(frum["from"])):
        from jx_base.query import QueryOp

        return QueryOp.wrap(frum)
    elif is_container(frum):
        return ListContainer("test_list", frum)
    else:
        return frum
Esempio n. 2
0
def filter(data, where):
    """
    where  - a function that accepts (record, rownum, rows) and returns boolean
    """
    if len(data) == 0 or where == None or where == TRUE:
        return data

    if isinstance(data, Container):
        return data.filter(where)

    if is_container(data):
        temp = get(where)
        dd = wrap(data)
        return wrap(
            [unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)])
    else:
        Log.error("Do not know how to handle type {{type}}",
                  type=data.__class__.__name__)

    try:
        return drill_filter(where, data)
    except Exception as _:
        # WOW!  THIS IS INEFFICIENT!
        return wrap([
            unwrap(d)
            for d in drill_filter(where, [DataObject(d) for d in data])
        ])
Esempio n. 3
0
def filter(data, where):
    """
    where  - a function that accepts (record, rownum, rows) and returns boolean
    """
    if len(data) == 0 or where == None or where == TRUE:
        return data

    if isinstance(data, Container):
        return data.filter(where)

    if is_container(data):
        temp = jx_expression_to_function(where)
        dd = wrap(data)
        return wrap([unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)])
    else:
        Log.error(
            "Do not know how to handle type {{type}}", type=data.__class__.__name__
        )

    try:
        return drill_filter(where, data)
    except Exception as _:
        # WOW!  THIS IS INEFFICIENT!
        return wrap(
            [unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])]
        )
Esempio n. 4
0
 def __init__(self, sep, concat):
     SQL.__init__(self)
     if not is_container(concat):
         concat = list(concat)
     if DEBUG:
         if not isinstance(sep, SQL):
             Log.error("Expecting SQL, not text")
         if any(not isinstance(s, SQL) for s in concat):
             Log.error("Can only join other SQL")
     self.sep = sep
     self.concat = concat
Esempio n. 5
0
def _where_terms(master, where, schema):
    """
    USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
    master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
    """
    if is_data(where):
        if where.term:
            # MAP TERM
            try:
                output = _map_term_using_schema(master, [], where.term, schema.edges)
                return output
            except Exception as e:
                Log.error("programmer problem?", e)
        elif where.terms:
            # MAP TERM
            output = FlatList()
            for k, v in where.terms.items():
                if not is_container(v):
                    Log.error("terms filter expects list of values")
                edge = schema.edges[k]
                if not edge:
                    output.append({"terms": {k: v}})
                else:
                    if is_text(edge):
                        # DIRECT FIELD REFERENCE
                        return {"terms": {edge: v}}
                    try:
                        domain = edge.getDomain()
                    except Exception as e:
                        Log.error("programmer error", e)
                    fields = domain.dimension.fields
                    if is_data(fields):
                        or_agg = []
                        for vv in v:
                            and_agg = []
                            for local_field, es_field in fields.items():
                                vvv = vv[local_field]
                                if vvv != None:
                                    and_agg.append({"term": {es_field: vvv}})
                            or_agg.append({"and": and_agg})
                        output.append({"or": or_agg})
                    elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]):
                        output.append({"terms": {fields[0]: v}})
                    elif domain.partitions:
                        output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
            return {"and": output}
        elif where["or"]:
            return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]}
        elif where["and"]:
            return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]}
        elif where["not"]:
            return {"not": unwrap(_where_terms(master, where["not"], schema))}
    return where
Esempio n. 6
0
def _where_terms(master, where, schema):
    """
    USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
    master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
    """
    if is_data(where):
        if where.term:
            # MAP TERM
            try:
                output = _map_term_using_schema(master, [], where.term, schema.edges)
                return output
            except Exception as e:
                Log.error("programmer problem?", e)
        elif where.terms:
            # MAP TERM
            output = FlatList()
            for k, v in where.terms.items():
                if not is_container(v):
                    Log.error("terms filter expects list of values")
                edge = schema.edges[k]
                if not edge:
                    output.append({"terms": {k: v}})
                else:
                    if is_text(edge):
                        # DIRECT FIELD REFERENCE
                        return {"terms": {edge: v}}
                    try:
                        domain = edge.getDomain()
                    except Exception as e:
                        Log.error("programmer error", e)
                    fields = domain.dimension.fields
                    if is_data(fields):
                        or_agg = []
                        for vv in v:
                            and_agg = []
                            for local_field, es_field in fields.items():
                                vvv = vv[local_field]
                                if vvv != None:
                                    and_agg.append({"term": {es_field: vvv}})
                            or_agg.append({"and": and_agg})
                        output.append({"or": or_agg})
                    elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]):
                        output.append({"terms": {fields[0]: v}})
                    elif domain.partitions:
                        output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
            return {"and": output}
        elif where["or"]:
            return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]}
        elif where["and"]:
            return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]}
        elif where["not"]:
            return {"not": unwrap(_where_terms(master, where["not"], schema))}
    return where
Esempio n. 7
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = FlatList()

        if isinstance(self.key, set):
            Log.error("problem")

        if isinstance(desc.partitions[0], (int, float, text_type)):
            # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
        elif desc.partitions and desc.dimension.fields and len(
                desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and is_container(desc.key):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and is_data(desc.partitions[0][desc.key]):
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
            # self.map = UniqueIndex(keys=self.key)
        elif desc.key == None:
            Log.error("Domains must have keys")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
        elif all(p.esfilter for p in self.partitions):
            # EVERY PART HAS AN esfilter DEFINED, SO USE THEM
            for i, p in enumerate(self.partitions):
                p.dataIndex = i

        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")
Esempio n. 8
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = FlatList()

        if isinstance(self.key, set):
            Log.error("problem")

        if isinstance(desc.partitions[0], (int, float, text_type)):
            # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
        elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and is_container(desc.key):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and is_data(desc.partitions[0][desc.key]):
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
            # self.map = UniqueIndex(keys=self.key)
        elif desc.key == None:
            Log.error("Domains must have keys")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
        elif all(p.esfilter for p in self.partitions):
            # EVERY PART HAS AN esfilter DEFINED, SO USE THEM
            for i, p in enumerate(self.partitions):
                p.dataIndex = i

        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")
Esempio n. 9
0
    def define(cls, expr):
        """
        GENERAL SUPPORT FOR BUILDING EXPRESSIONS FROM JSON EXPRESSIONS
        OVERRIDE THIS IF AN OPERATOR EXPECTS COMPLICATED PARAMETERS
        :param expr: Data representing a JSON Expression
        :return: parse tree
        """

        try:
            lang = cls.lang
            items = items_(expr)
            for item in items:
                op, term = item
                full_op = operators.get(op)
                if full_op:
                    class_ = lang.ops[full_op.get_id()]
                    clauses = {
                        k: jx_expression(v)
                        for k, v in expr.items() if k != op
                    }
                    break
            else:
                if not items:
                    return NULL
                raise Log.error("{{operator|quote}} is not a known operator",
                                operator=expr)

            if term == None:
                return class_([], **clauses)
            elif is_container(term):
                terms = [jx_expression(t) for t in term]
                return class_(terms, **clauses)
            elif is_data(term):
                items = items_(term)
                if class_.has_simple_form:
                    if len(items) == 1:
                        k, v = items[0]
                        return class_([Variable(k), Literal(v)], **clauses)
                    else:
                        return class_({k: Literal(v)
                                       for k, v in items}, **clauses)
                else:
                    return class_(_jx_expression(term, lang), **clauses)
            else:
                if op in ["literal", "date", "offset"]:
                    return class_(term, **clauses)
                else:
                    return class_(_jx_expression(term, lang), **clauses)
        except Exception as e:
            Log.error("programmer error expr = {{value|quote}}",
                      value=expr,
                      cause=e)
Esempio n. 10
0
def UNION(values, *others):
    if len(others) > 0:
        from mo_logs import Log
        Log.error("no longer accepting args, use a single list")

    output = set()
    for v in values:
        if values == None:
            continue
        if is_container(v):
            output.update(v)
            continue
        else:
            output.add(v)
    return output
Esempio n. 11
0
    def to_esfilter(self, schema):
        if is_op(self.lhs, Variable_) and is_literal(self.rhs):
            rhs = self.rhs.value
            lhs = self.lhs.var
            cols = schema.leaves(lhs)
            if not cols:
                Log.warning(
                    "{{col}} does not exist while processing {{expr}}",
                    col=lhs,
                    expr=self.__data__(),
                )

            if is_container(rhs):
                if len(rhs) == 1:
                    rhs = rhs[0]
                else:
                    types = Data()  # MAP JSON TYPE TO LIST OF LITERALS
                    for r in rhs:
                        types[python_type_to_json_type[r.__class__]] += [r]
                    if len(types) == 1:
                        jx_type, values = first(types.items())
                        for c in cols:
                            if jx_type == c.jx_type or (jx_type in NUMBER_TYPES
                                                        and c.jx_type
                                                        in NUMBER_TYPES):
                                return {"terms": {c.es_column: values}}
                        return FALSE.to_esfilter(schema)
                    else:
                        return (OrOp([
                            EqOp([self.lhs, values])
                            for t, values in types.items()
                        ]).partial_eval().to_esfilter(schema))

            for c in cols:
                if c.jx_type == BOOLEAN:
                    rhs = pull_functions[c.jx_type](rhs)
                rhs_type = python_type_to_json_type[rhs.__class__]
                if rhs_type == c.jx_type or (rhs_type in NUMBER_TYPES
                                             and c.jx_type in NUMBER_TYPES):
                    return {"term": {c.es_column: rhs}}
            return FALSE.to_esfilter(schema)
        else:
            return (ES52[CaseOp([
                WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}),
                WhenOp(self.rhs.missing(), **{"then": FALSE}),
                BasicEqOp([self.lhs, self.rhs]),
            ]).partial_eval()].to_esfilter(schema))
Esempio n. 12
0
    def to_esfilter(self, schema):
        if is_op(self.lhs, Variable_) and is_literal(self.rhs):
            rhs = self.rhs.value
            lhs = self.lhs.var
            cols = schema.leaves(lhs)

            if is_container(rhs):
                if len(rhs) == 1:
                    rhs = rhs[0]
                else:
                    types = Data()  # MAP JSON TYPE TO LIST OF LITERALS
                    for r in rhs:
                        types[python_type_to_json_type[rhs.__class__]] += [r]
                    if len(types) == 1:
                        jx_type, values = first(types.items())
                        for c in cols:
                            if jx_type == c.jx_type:
                                return {"terms": {c.es_column: values}}
                        return FALSE.to_esfilter(schema)
                    else:
                        return (OrOp([
                            EqOp([self.lhs, values])
                            for t, values in types.items()
                        ]).partial_eval().to_esfilter(schema))

            for c in cols:
                if c.jx_type == BOOLEAN:
                    rhs = pull_functions[c.jx_type](rhs)
                if python_type_to_json_type[rhs.__class__] == c.jx_type:
                    return {"term": {c.es_column: rhs}}
            return FALSE.to_esfilter(schema)
        else:
            return (ES52[CaseOp([
                WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}),
                WhenOp(self.rhs.missing(), **{"then": FALSE}),
                BasicEqOp([self.lhs, self.rhs]),
            ])].partial_eval().to_esfilter(schema))
 def define(cls, expr):
     expr = to_data(expr)
     term = expr.select
     terms = []
     if not is_container(term):
         raise Log.error("Expecting a list")
     for t in term:
         if is_text(t):
             if not is_variable_name(t):
                 Log.error(
                     "expecting {{value}} a simple dot-delimited path name",
                     value=t)
             terms.append({"name": t, "value": _jx_expression(t, cls.lang)})
         elif t.name == None:
             if t.value == None:
                 Log.error(
                     "expecting select parameters to have name and value properties"
                 )
             elif is_text(t.value):
                 if not is_variable_name(t):
                     Log.error(
                         "expecting {{value}} a simple dot-delimited path name",
                         value=t.value,
                     )
                 else:
                     terms.append({
                         "name":
                         t.value,
                         "value":
                         _jx_expression(t.value, cls.lang),
                     })
             else:
                 Log.error("expecting a name property")
         else:
             terms.append({"name": t.name, "value": jx_expression(t.value)})
     return cls.lang[SelectOp(terms)]
Esempio n. 14
0
def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=None):
    show_detail = True
    test = unwrap(test)
    expected = unwrap(expected)
    try:
        if test is None and expected is None:
            return
        elif test is expected:
            return
        elif is_text(expected):
            assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta)
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif is_data(expected) and is_data(test):
            for k, v2 in unwrap(expected).items():
                v1 = test.get(k)
                assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta)
        elif is_data(expected):
            for k, v2 in expected.items():
                if is_text(k):
                    v1 = mo_dots.get_attr(test, literal_field(k))
                else:
                    v1 = test[k]
                assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta)
        elif is_container(test) and isinstance(expected, set):
            test = set(wrap(t) for t in test)
            if len(test) != len(expected):
                Log.error(
                    "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}",
                    test=test,
                    expected=expected
                )

            for e in expected:
                for t in test:
                    try:
                        assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta)
                        break
                    except Exception as _:
                        pass
                else:
                    Log.error("Sets do not match. {{value|json}} not found in {{test|json}}", value=e, test=test)

        elif isinstance(expected, types.FunctionType):
            return expected(test)
        elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"):
            if test.__class__.__name__ == "ndarray":  # numpy
                test = test.tolist()
            elif test.__class__.__name__ == "DataFrame":  # pandas
                test = test[test.columns[0]].values.tolist()
            elif test.__class__.__name__ == "Series":  # pandas
                test = test.values.tolist()

            if not expected and test == None:
                return
            if expected == None:
                expected = []  # REPRESENT NOTHING
            for a, b in zip_longest(test, expected):
                assertAlmostEqual(a, b, msg=msg, digits=digits, places=places, delta=delta)
        else:
            assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta)
    except Exception as e:
        Log.error(
            "{{test|json|limit(10000)}} does not match expected {{expected|json|limit(10000)}}",
            test=test if show_detail else "[can not show]",
            expected=expected if show_detail else "[can not show]",
            cause=e
        )
Esempio n. 15
0
def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param parent: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = python_type_to_json_type[d.__class__]

        if row_type != "object":
            # EXPECTING PRIMITIVE VALUE
            full_name = parent
            column = columns[full_name]
            if not column:
                column = Column(
                    name=concat_field(table_name, full_name),
                    es_column=full_name,
                    es_index=".",
                    es_type=d.__class__.__name__,
                    jx_type=None,  # WILL BE SET BELOW
                    last_updated=Date.now(),
                    nested_path=nested_path,
                )
                columns.add(column)
            column.es_type = _merge_python_type(column.es_type, d.__class__)
            column.jx_type = python_type_to_json_type[column.es_type]
        else:
            for name, value in d.items():
                full_name = concat_field(parent, name)
                column = columns[full_name]
                if not column:
                    column = Column(
                        name=concat_field(table_name, full_name),
                        es_column=full_name,
                        es_index=".",
                        es_type=value.__class__.__name__,
                        jx_type=None,  # WILL BE SET BELOW
                        last_updated=Date.now(),
                        nested_path=nested_path,
                    )
                    columns.add(column)
                if is_container(value):  # GET TYPE OF MULTIVALUE
                    v = list(value)
                    if len(v) == 0:
                        this_type = none_type.__name__
                    elif len(v) == 1:
                        this_type = v[0].__class__.__name__
                    else:
                        this_type = reduce(
                            _merge_python_type, (vi.__class__.__name__ for vi in value)
                        )
                else:
                    this_type = value.__class__.__name__
                column.es_type = _merge_python_type(column.es_type, this_type)
                column.jx_type = python_type_to_json_type[column.es_type]

                if this_type in {"object", "dict", "Mapping", "Data"}:
                    _get_schema_from_list(
                        [value], table_name, full_name, nested_path, columns
                    )
                elif this_type in {"list", "FlatList"}:
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(
                        value, table_name, full_name, newpath, columns
                    )
Esempio n. 16
0
def _normalize_edge(edge, dim_index, limit, schema=None):
    """
    :param edge: Not normalized edge
    :param dim_index: Dimensions are ordered; this is this edge's index into that order
    :param schema: for context
    :return: a normalized edge
    """
    if not _Column:
        _late_import()

    if not edge:
        Log.error("Edge has no value, or expression is empty")
    elif is_text(edge):
        if schema:
            leaves = unwraplist(list(schema.leaves(edge)))
            if not leaves or is_container(leaves):
                return [
                    Data(
                        name=edge,
                        value=jx_expression(edge, schema=schema),
                        allowNulls=True,
                        dim=dim_index,
                        domain=_normalize_domain(None, limit)
                    )
                ]
            elif isinstance(leaves, _Column):
                return [Data(
                    name=edge,
                    value=jx_expression(edge, schema=schema),
                    allowNulls=True,
                    dim=dim_index,
                    domain=_normalize_domain(domain=leaves, limit=limit, schema=schema)
                )]
            elif is_list(leaves.fields) and len(leaves.fields) == 1:
                return [Data(
                    name=leaves.name,
                    value=jx_expression(leaves.fields[0], schema=schema),
                    allowNulls=True,
                    dim=dim_index,
                    domain=leaves.getDomain()
                )]
            else:
                return [Data(
                    name=leaves.name,
                    allowNulls=True,
                    dim=dim_index,
                    domain=leaves.getDomain()
                )]
        else:
            return [
                Data(
                    name=edge,
                    value=jx_expression(edge, schema=schema),
                    allowNulls=True,
                    dim=dim_index,
                    domain=DefaultDomain()
                )
            ]
    else:
        edge = wrap(edge)
        if not edge.name and not is_text(edge.value):
            Log.error("You must name compound and complex edges: {{edge}}", edge=edge)

        if is_container(edge.value) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Data(fields=edge.value)

            return [Data(
                name=edge.name,
                value=jx_expression(edge.value, schema=schema),
                allowNulls=bool(coalesce(edge.allowNulls, True)),
                dim=dim_index,
                domain=domain
            )]

        domain = _normalize_domain(edge.domain, schema=schema)

        return [Data(
            name=coalesce(edge.name, edge.value),
            value=jx_expression(edge.value, schema=schema),
            range=_normalize_range(edge.range),
            allowNulls=bool(coalesce(edge.allowNulls, True)),
            dim=dim_index,
            domain=domain
        )]
Esempio n. 17
0
def _normalize_edge(edge, dim_index, limit, schema=None):
    """
    :param edge: Not normalized edge
    :param dim_index: Dimensions are ordered; this is this edge's index into that order
    :param schema: for context
    :return: a normalized edge
    """
    if not _Column:
        _late_import()

    if not edge:
        Log.error("Edge has no value, or expression is empty")
    elif is_text(edge):
        if schema:
            leaves = unwraplist(list(schema.leaves(edge)))
            if not leaves or is_container(leaves):
                return [
                    Data(name=edge,
                         value=jx_expression(edge, schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=_normalize_domain(None, limit))
                ]
            elif isinstance(leaves, _Column):
                return [
                    Data(name=edge,
                         value=jx_expression(edge, schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=_normalize_domain(domain=leaves,
                                                  limit=limit,
                                                  schema=schema))
                ]
            elif is_list(leaves.fields) and len(leaves.fields) == 1:
                return [
                    Data(name=leaves.name,
                         value=jx_expression(leaves.fields[0], schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=leaves.getDomain())
                ]
            else:
                return [
                    Data(name=leaves.name,
                         allowNulls=True,
                         dim=dim_index,
                         domain=leaves.getDomain())
                ]
        else:
            return [
                Data(name=edge,
                     value=jx_expression(edge, schema=schema),
                     allowNulls=True,
                     dim=dim_index,
                     domain=DefaultDomain())
            ]
    else:
        edge = wrap(edge)
        if not edge.name and not is_text(edge.value):
            Log.error("You must name compound and complex edges: {{edge}}",
                      edge=edge)

        if is_container(edge.value) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Data(fields=edge.value)

            return [
                Data(name=edge.name,
                     value=jx_expression(edge.value, schema=schema),
                     allowNulls=bool(coalesce(edge.allowNulls, True)),
                     dim=dim_index,
                     domain=domain)
            ]

        domain = _normalize_domain(edge.domain, schema=schema)

        return [
            Data(name=coalesce(edge.name, edge.value),
                 value=jx_expression(edge.value, schema=schema),
                 range=_normalize_range(edge.range),
                 allowNulls=bool(coalesce(edge.allowNulls, True)),
                 dim=dim_index,
                 domain=domain)
        ]
Esempio n. 18
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = FlatList()
        self.primitive = True  # True IF DOMAIN IS A PRIMITIVE VALUE SET

        if isinstance(self.key, set):
            Log.error("problem")

        if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (text_type, Number, tuple))):
            # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.map = {}
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
                if isinstance(p, (int, float)):
                    text_part = text_type(float(p))  # ES CAN NOT HANDLE NUMERIC PARTS
                    self.map[text_part] = part
                    self.order[text_part] = i
            self.label = coalesce(self.label, "name")
            self.primitive = True
            return

        if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and is_container(desc.key):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and is_data(desc.partitions[0][desc.key]):
            # LOOKS LIKE OBJECTS
            # sorted = desc.partitions[desc.key]

            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            self.order = {p[self.key]: p.dataIndex for p in desc.partitions}
            self.partitions = desc.partitions
        elif len(desc.partitions) == 0:
            # CREATE AN EMPTY DOMAIN
            self.key = "value"
            self.map = {}
            self.order[None] = 0
            self.label = coalesce(self.label, "name")
            return
        elif desc.key == None:
            if desc.partitions and all(desc.partitions.where) or all(desc.partitions.esfilter):
                if not all(desc.partitions.name):
                    Log.error("Expecting all partitions to have a name")
                self.key = "name"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.partitions.append({
                        "where": jx_expression(coalesce(p.where, p.esfilter)),
                        "name": p.name,
                        "dataIndex": i
                    })
                    self.map[p.name] = p
                    self.order[p.name] = i
                return
            elif desc.partitions and len(set(desc.partitions.value)-{None}) == len(desc.partitions):
                # TRY A COMMON KEY CALLED "value".  IT APPEARS UNIQUE
                self.key = "value"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.map[p[self.key]] = p
                    self.order[p[self.key]] = i
                self.primitive = False
            else:
                Log.error("Domains must have keys, or partitions")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
            self.primitive = False
        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")

        if hasattr(desc.partitions, "__iter__"):
            self.partitions = wrap(list(desc.partitions))
        else:
            Log.error("expecting a list of partitions")
Esempio n. 19
0
def _normalize(esfilter):
    """
    TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING
    REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS
    """
    if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal:
        return esfilter

    # Log.note("from: " + convert.value2json(esfilter))
    isDiff = True

    while isDiff:
        isDiff = False

        if esfilter.bool.filter:
            terms = esfilter.bool.filter
            for (i0, t0), (i1,
                           t1) in itertools.product(enumerate(terms),
                                                    enumerate(terms)):
                if i0 == i1:
                    continue  # SAME, IGNORE
                # TERM FILTER ALREADY ASSUMES EXISTENCE
                with suppress_exception:
                    if (t0.exists.field != None
                            and t0.exists.field == t1.term.items()[0][0]):
                        terms[i0] = MATCH_ALL
                        continue

                # IDENTICAL CAN BE REMOVED
                with suppress_exception:
                    if t0 == t1:
                        terms[i0] = MATCH_ALL
                        continue

                # MERGE range FILTER WITH SAME FIELD
                if i0 > i1:
                    continue  # SAME, IGNORE
                with suppress_exception:
                    f0, tt0 = t0.range.items()[0]
                    f1, tt1 = t1.range.items()[0]
                    if f0 == f1:
                        set_default(terms[i0].range[literal_field(f1)], tt1)
                        terms[i1] = MATCH_ALL

            output = []
            for a in terms:
                if is_container(a):
                    from mo_logs import Log

                    Log.error("and clause is not allowed a list inside a list")
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_
                if a == MATCH_ALL:
                    isDiff = True
                    continue
                if a == MATCH_NONE:
                    return MATCH_NONE
                if a.bool.filter:
                    isDiff = True
                    a.isNormal = None
                    output.extend(a.bool.filter)
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_ALL
            elif len(output) == 1:
                # output[0].isNormal = True
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = es_and(output)
            continue

        if esfilter.bool.should:
            output = []
            for a in esfilter.bool.should:
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_

                if a.bool.should:
                    a.isNormal = None
                    isDiff = True
                    output.extend(a.bool.should)
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_NONE
            elif len(output) == 1:
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = wrap(es_or(output))
            continue

        if esfilter.term != None:
            if esfilter.term.keys():
                esfilter.isNormal = True
                return esfilter
            else:
                return MATCH_ALL

        if esfilter.terms:
            for k, v in esfilter.terms.items():
                if len(v) > 0:
                    if OR(vv == None for vv in v):
                        rest = [vv for vv in v if vv != None]
                        if len(rest) > 0:
                            output = es_or(
                                [es_missing(k), {
                                    "terms": {
                                        k: rest
                                    }
                                }])
                        else:
                            output = es_missing(k)
                        output.isNormal = True
                        return output
                    else:
                        esfilter.isNormal = True
                        return esfilter
            return MATCH_NONE

        if esfilter.bool.must_not:
            _sub = esfilter.bool.must_not
            sub = _normalize(_sub)
            if sub == MATCH_NONE:
                return MATCH_ALL
            elif sub == MATCH_ALL:
                return MATCH_NONE
            elif sub is not _sub:
                sub.isNormal = None
                return wrap({"bool": {"must_not": sub, "isNormal": True}})
            else:
                sub.isNormal = None

    esfilter.isNormal = True
    return esfilter
Esempio n. 20
0
def _get_schema_from_list(
    frum,  # The list
    table_name,  # Name of the table this list holds records for
    parent,  # parent path
    nested_path,  # each nested array, in reverse order
    columns,  # map from full name to column definition
    native_type_to_json_type  # dict from storage type name to json type name
):
    for d in frum:
        row_type = python_type_to_json_type[d.__class__]

        if row_type != "object":
            # EXPECTING PRIMITIVE VALUE
            full_name = parent
            column = columns[full_name]
            if not column:
                column = Column(
                    name=concat_field(table_name, full_name),
                    es_column=full_name,
                    es_index=".",
                    es_type=d.__class__.__name__,
                    jx_type=None,  # WILL BE SET BELOW
                    last_updated=Date.now(),
                    nested_path=nested_path,
                )
                columns.add(column)
            column.es_type = _merge_python_type(column.es_type, d.__class__)
            column.jx_type = native_type_to_json_type[column.es_type]
        else:
            for name, value in d.items():
                full_name = concat_field(parent, name)
                column = columns[full_name]
                if not column:
                    column = Column(
                        name=concat_field(table_name, full_name),
                        es_column=full_name,
                        es_index=".",
                        es_type=value.__class__.__name__,
                        jx_type=None,  # WILL BE SET BELOW
                        last_updated=Date.now(),
                        nested_path=nested_path,
                    )
                    columns.add(column)
                if is_container(value):  # GET TYPE OF MULTIVALUE
                    v = list(value)
                    if len(v) == 0:
                        this_type = none_type.__name__
                    elif len(v) == 1:
                        this_type = v[0].__class__.__name__
                    else:
                        this_type = reduce(_merge_python_type,
                                           (vi.__class__.__name__
                                            for vi in value))
                else:
                    this_type = value.__class__.__name__
                column.es_type = _merge_python_type(column.es_type, this_type)
                try:
                    column.jx_type = native_type_to_json_type[column.es_type]
                except Exception as e:
                    raise e

                if this_type in {"object", "dict", "Mapping", "Data"}:
                    _get_schema_from_list([value], table_name, full_name,
                                          nested_path, columns,
                                          native_type_to_json_type)
                elif this_type in {"list", "FlatList"}:
                    np = listwrap(nested_path)
                    newpath = unwraplist(
                        [join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(value, table_name, full_name,
                                          newpath, columns)
Esempio n. 21
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = FlatList()
        self.primitive = True  # True IF DOMAIN IS A PRIMITIVE VALUE SET

        if isinstance(self.key, set):
            Log.error("problem")

        if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (text, Number, tuple))):
            # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = "value"
            self.map = {}
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p, "dataIndex": i}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
                if isinstance(p, (int, float)):
                    text_part = text(float(p))  # ES CAN NOT HANDLE NUMERIC PARTS
                    self.map[text_part] = part
                    self.order[text_part] = i
            self.label = coalesce(self.label, "name")
            self.primitive = True
            return

        if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and is_container(desc.key):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and is_data(desc.partitions[0][desc.key]):
            # LOOKS LIKE OBJECTS
            # sorted = desc.partitions[desc.key]

            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            self.order = {p[self.key]: p.dataIndex for p in desc.partitions}
            self.partitions = desc.partitions
        elif len(desc.partitions) == 0:
            # CREATE AN EMPTY DOMAIN
            self.key = "value"
            self.map = {}
            self.order[None] = 0
            self.label = coalesce(self.label, "name")
            return
        elif desc.key == None:
            if desc.partitions and all(desc.partitions.where) or all(desc.partitions.esfilter):
                if not all(desc.partitions.name):
                    Log.error("Expecting all partitions to have a name")
                self.key = "name"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.partitions.append({
                        "where": jx_expression(coalesce(p.where, p.esfilter)),
                        "name": p.name,
                        "dataIndex": i
                    })
                    self.map[p.name] = p
                    self.order[p.name] = i
                return
            elif desc.partitions and len(set(desc.partitions.value)-{None}) == len(desc.partitions):
                # TRY A COMMON KEY CALLED "value".  IT APPEARS UNIQUE
                self.key = "value"
                self.map = dict()
                self.map[None] = self.NULL
                self.order[None] = len(desc.partitions)
                for i, p in enumerate(desc.partitions):
                    self.map[p[self.key]] = p
                    self.order[p[self.key]] = i
                self.primitive = False
            else:
                Log.error("Domains must have keys, or partitions")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
            self.primitive = False
        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")

        if hasattr(desc.partitions, "__iter__"):
            self.partitions = wrap(list(desc.partitions))
        else:
            Log.error("expecting a list of partitions")