コード例 #1
0
    def map_to_sql(self, var=""):
        """
        RETURN A MAP FROM THE RELATIVE AND ABSOLUTE NAME SPACE TO COLUMNS
        """
        origin = self.nested_path[0]
        if startswith_field(var, origin) and origin != var:
            var = relative_field(var, origin)
        fact_dict = {}
        origin_dict = {}
        for k, cs in self.namespace.items():
            for c in cs:
                if c.jx_type in STRUCT:
                    continue

                if startswith_field(get_property_name(k), var):
                    origin_dict.setdefault(relative_field(c.name, origin),
                                           []).append(c)

                    if origin != c.nested_path[0]:
                        fact_dict.setdefault(c.name, []).append(c)
                elif origin == var:
                    origin_dict.setdefault(
                        concat_field(var, relative_field(c.name, origin)),
                        []).append(c)

                    if origin != c.nested_path[0]:
                        fact_dict.setdefault(concat_field(var, c.name),
                                             []).append(c)

        return set_default(origin_dict, fact_dict)
コード例 #2
0
ファイル: leaves_op.py プロジェクト: armenzg/smart-scheduling
 def to_bq(self, schema, not_null=False, boolean=False):
     if not is_op(self.term, Variable):
         Log.error("Can only handle Variable")
     term = self.term.var
     prefix_length = len(split_field(term))
     output = wrap(
         [
             {
                 "name": join_field(
                     split_field(schema.get_column_name(c))[prefix_length:]
                 ),
                 "sql": Variable(schema.get_column_name(c)).to_bq(schema)[0].sql,
             }
             for c in schema.columns
             if startswith_field(c.name, term)
             and (
                 (
                     c.jx_type not in (EXISTS, OBJECT, NESTED)
                     and startswith_field(schema.nested_path[0], c.nested_path[0])
                 )
                 or (
                     c.jx_type not in (EXISTS, OBJECT)
                     and schema.nested_path[0] == c.nested_path[0]
                 )
             )
         ]
     )
     return output
コード例 #3
0
ファイル: nested_op.py プロジェクト: klahnakoski/jx-python
    def __and__(self, other):
        """
        MERGE TWO  NestedOp
        """
        if not is_op(other, NestedOp):
            return AndOp([self, other])

        # MERGE
        elif self.path == other.frum:
            return NestedOp(
                self.path,
                listwrap(self.select) + listwrap(other.select),
                AndOp([self.where, other.where]),
                coalesce(self.sort, other.sort),
                coalesce(self.limit, other.limit),
            )

        # NEST
        elif startswith_field(other.frum.var, self.path.var):
            # WE ACHIEVE INTERSECTION BY LIMITING OURSELF TO ONLY THE DEEP OBJECTS
            # WE ASSUME frum SELECTS WHOLE DOCUMENT, SO self.select IS POSSIBLE
            return NestedOp(other, self.select, self.where, self.sort, self.limit,)

        elif startswith_field(self.path.var, other.frum.var):
            return NestedOp(self, other.select, other.where, other.sort, other.limit,)
        else:
            return AndOp([self, other])
コード例 #4
0
def _indexer(columns, query_path):
    all_names = set(unnest_path(n) for c in columns
                    for n in c.names.values()) | {"."}

    lookup_leaves = {}  # ALL LEAF VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (startswith_field(nfp, full_name)
                    and c.type not in [EXISTS, OBJECT, NESTED]
                    and (c.es_column != "_id" or full_name == "_id")):
                cs = lookup_leaves.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_leaves.setdefault(untype_path(full_name), set())
                cs.add(c)

    lookup_variables = {}  # ALL NOT-NESTED VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (startswith_field(nfp, full_name)
                    and c.type not in [EXISTS, OBJECT]
                    and (c.es_column != "_id" or full_name == "_id")
                    and startswith_field(c.nested_path[0], query_path)):
                cs = lookup_variables.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_variables.setdefault(untype_path(full_name), set())
                cs.add(c)

    relative_lookup = {}
    for c in columns:
        try:
            cname = c.names[query_path]
            cs = relative_lookup.setdefault(cname, set())
            cs.add(c)

            ucname = untype_path(cname)
            cs = relative_lookup.setdefault(ucname, set())
            cs.add(c)
        except Exception as e:
            Log.error("Should not happen", cause=e)

    if query_path != ".":
        # ADD ABSOLUTE NAMES TO THE NAMESAPCE
        absolute_lookup, more_leaves, more_variables = _indexer(columns, ".")
        for k, cs in absolute_lookup.items():
            if k not in relative_lookup:
                relative_lookup[k] = cs
        for k, cs in more_leaves.items():
            if k not in lookup_leaves:
                lookup_leaves[k] = cs
        for k, cs in more_variables.items():
            if k not in lookup_variables:
                lookup_variables[k] = cs

    return relative_lookup, lookup_leaves, lookup_variables
コード例 #5
0
ファイル: language.py プロジェクト: mozilla/cia-tasks
 def __new__(cls, name, bases, dct):
     x = type.__new__(cls, name, bases, dct)
     x.lang = None
     if startswith_field(x.__module__, expression_module):
         # ALL OPS IN expression_module ARE GIVEN AN ID, NO OTHERS
         setattr(x, ID, next_id())
     return x
コード例 #6
0
    def _get_sql_schema(self, frum):
        """
        :param nest: the path to the nested sub-table
        :return: relative schema for the sub-table; change `es_index` to sql alias
        """
        # WE MUST HAVE THE ALIAS NAMES FOR THE TABLES
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.nested_tables.items())
        }

        def paths(field):
            path = split_field(field)
            for i in range(len(path) + 1):
                yield join_field(path[0:i])

        columns = Data()
        for k in set(kk for k in self.columns.keys() for kk in paths(k)):
            for j, c in ((j, cc) for j, c in self.columns.items() for cc in c):
                if startswith_field(j, k):
                    if c.type in STRUCT:
                        continue
                    c = copy(c)
                    c.es_index = nest_to_alias[c.nested_path[0]]
                    columns[literal_field(k)] += [c]
        columns._db = self.db
        return unwrap(columns)
コード例 #7
0
ファイル: meta.py プロジェクト: klahnakoski/pyLibrary
    def leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
        """
        clean_name = unnest_path(column_name)

        if clean_name != column_name:
            clean_name = column_name
            cleaner = lambda x: x
        else:
            cleaner = unnest_path


        columns = self.columns
        # TODO: '.' IMPLIES ALL FIELDS FROM ABSOLUTE PERPECTIVE, ALL OTHERS ARE A RELATIVE PERSPECTIVE
        # TODO: HOW TO REFER TO FIELDS THAT MAY BE SHADOWED BY A RELATIVE NAME?
        for path in reversed(self.query_path) if clean_name == '.' else self.query_path:
            output = [
                c
                for c in columns
                if (
                    (c.name != "_id" or clean_name == "_id") and
                    (
                        (c.jx_type == EXISTS and column_name.endswith("." + EXISTS_TYPE)) or
                        c.jx_type not in OBJECTS or
                        (clean_name == '.' and c.cardinality == 0)
                    ) and
                    startswith_field(cleaner(relative_field(c.name, path)), clean_name)
                )
            ]
            if output:
                return set(output)
        return set()
コード例 #8
0
    def leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
        """
        clean_name = unnest_path(column_name)

        if clean_name != column_name:
            clean_name = column_name
            cleaner = lambda x: x
        else:
            cleaner = unnest_path

        columns = self.columns
        # TODO: '.' IMPLIES ALL FIELDS FROM ABSOLUTE PERPECTIVE, ALL OTHERS ARE A RELATIVE PERSPECTIVE
        # TODO: HOW TO REFER TO FIELDS THAT MAY BE SHADOWED BY A RELATIVE NAME?
        for path in reversed(
                self.query_path) if clean_name == '.' else self.query_path:
            output = [
                c for c in columns
                if ((c.name != "_id" or clean_name == "_id") and (
                    (c.jx_type == EXISTS and column_name.endswith(
                        "." + EXISTS_TYPE)) or c.jx_type not in OBJECTS or
                    (clean_name == '.' and c.cardinality == 0))
                    and startswith_field(cleaner(relative_field(c.name, path)),
                                         clean_name))
            ]
            if output:
                return set(output)
        return set()
コード例 #9
0
    def new_leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, INCLUDING DEEP COLUMNS
        """
        column_name = unnest_path(column_name)
        columns = self.columns
        all_paths = self.snowflake.sorted_query_paths

        output = {}
        for c in columns:
            if c.name == "_id" and column_name != "_id":
                continue
            if c.jx_type in OBJECTS:
                continue
            if c.cardinality == 0:
                continue
            for path in all_paths:
                if not startswith_field(
                        unnest_path(relative_field(c.name, path)),
                        column_name):
                    continue
                existing = output.get(path)
                if not existing:
                    output[path] = [c]
                    continue
                if len(path) > len(c.nested_path[0]):
                    continue
                if any("." + t + "." in c.es_column
                       for t in (STRING_TYPE, NUMBER_TYPE, BOOLEAN_TYPE)):
                    # ELASTICSEARCH field TYPES ARE NOT ALLOWED
                    continue
                # ONLY THE DEEPEST COLUMN WILL BE CHOSEN
                output[path].append(c)
        return set(output.values())
コード例 #10
0
ファイル: meta.py プロジェクト: klahnakoski/pyLibrary
    def new_leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, INCLUDING DEEP COLUMNS
        """
        column_name = unnest_path(column_name)
        columns = self.columns
        all_paths = self.snowflake.sorted_query_paths

        output = {}
        for c in columns:
            if c.name == "_id" and column_name != "_id":
                continue
            if c.jx_type in OBJECTS:
                continue
            if c.cardinality == 0:
                continue
            for path in all_paths:
                if not startswith_field(unnest_path(relative_field(c.name, path)), column_name):
                    continue
                existing = output.get(path)
                if not existing:
                    output[path] = [c]
                    continue
                if len(path) > len(c.nested_path[0]):
                    continue
                if any("." + t + "." in c.es_column for t in (STRING_TYPE, NUMBER_TYPE, BOOLEAN_TYPE)):
                    # ELASTICSEARCH field TYPES ARE NOT ALLOWED
                    continue
                # ONLY THE DEEPEST COLUMN WILL BE CHOSEN
                output[path].append(c)
        return set(output.values())
コード例 #11
0
ファイル: schema.py プロジェクト: klahnakoski/annotations
 def leaves(self, prefix):
     full_name = concat_field(self.nested_path, prefix)
     return set(
         c for c in self.snowflake.namespace.columns.find(
             self.snowflake.fact_name) for k in [c.name]
         if startswith_field(k, full_name) and k != GUID or k == full_name
         if c.jx_type not in [OBJECT, EXISTS])
コード例 #12
0
    def __getitem__(self, item):
        if isinstance(item, text):
            sub_schema = self.schema
            for n in split_field(item):
                if n in sub_schema.more:
                    sub_schema = sub_schema.more.get(n)
                else:
                    sub_schema = sub_schema.values.get(n)

            return Table(
                {
                    k: v
                    for k, v in self.values.items()
                    if startswith_field(k, item)
                }, {
                    k: v
                    for k, v in self.reps.items() if startswith_field(k, item)
                }, {
                    k: v
                    for k, v in self.defs.items() if startswith_field(k, item)
                }, self.num_rows, sub_schema, self.max_definition_level)
        elif isinstance(item, slice):
            start = coalesce(item.start, 0)
            stop = coalesce(item.stop, self.num_rows)

            if start == 0 and stop == self.num_rows:
                return self

            first = 0
            last = 0
            counter = 0
            for i, r in enumerate(self.reps):
                if counter == start:
                    first = i
                elif counter == stop:
                    last = i
                    break
                if r == 0:
                    counter += 1

            return Table({k: v[first:last]
                          for k, v in self.values.items()},
                         {k: v[first:last]
                          for k, v in self.reps.items()},
                         {k: v[first:last]
                          for k, v in self.defs.items()}, stop - start,
                         self.schema)
コード例 #13
0
    def _edges_op(self, query, frum):
        query = query.copy()  # WE WILL BE MARKING UP THE QUERY
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        frum_path = split_field(frum)
        base_table = join_field(frum_path[0:1])
        path = join_field(frum_path[1:])
        nest_to_alias = {
            nested_path: quote_column("__" + unichr(ord('a') + i) + "__")
            for i, (nested_path, sub_table) in enumerate(self.sf.tables.items())
        }

        schema = self.sf.tables[relative_field(frum, self.sf.fact)].schema

        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = quote_column(join_field([base_table] + split_field(tables[0].nest))) + tables[0].alias
        for previous, t in zip(tables, tables[1::]):
            from_sql += (
                SQL_LEFT_JOIN + quote_column(concat_field(base_table, t.nest)) + t.alias +
                SQL_ON + join_column(t.alias, quoted_PARENT) + " = " + join_column(previous.alias, quoted_UID)
            )

        main_filter = query.where.to_sql(schema, boolean=True)[0].sql.b

        # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH
        ons = []
        join_types = []
        wheres = []
        null_ons = [EXISTS_COLUMN + SQL_IS_NULL]
        groupby = []
        null_groupby = []
        orderby = []
        domains = []

        select_clause = [SQL_ONE + EXISTS_COLUMN] + [quote_column(c.es_column) for c in self.sf.tables['.'].columns]

        for edge_index, query_edge in enumerate(query.edges):
            edge_alias = quote_column("e" + text_type(edge_index))

            if query_edge.value:
                edge_values = [p for c in query_edge.value.to_sql(schema).sql for p in c.items()]

            elif not query_edge.value and any(query_edge.domain.partitions.where):
                case = SQL_CASE
                for pp, p in enumerate(query_edge.domain.partitions):
                    w = p.where.to_sql(schema)[0].sql.b
                    t = quote_value(pp)
                    case += SQL_WHEN + w + SQL_THEN + t
                case += SQL_ELSE + SQL_NULL + SQL_END  # quote value with length of partitions
                edge_values = [("n", case)]

            elif query_edge.range:
                edge_values = query_edge.range.min.to_sql(schema)[0].sql.items() + query_edge.range.max.to_sql(schema)[
                    0].sql.items()
コード例 #14
0
    def _nest_column(self, column, new_path):
        destination_table = join_field([self.name] + split_field(new_path))
        existing_table = join_field([self.name] +
                                    split_field(column.nested_path[0]))

        # FIND THE INNER COLUMNS WE WILL BE MOVING
        new_columns = {}
        for cname, cols in self.columns.items():
            if startswith_field(cname, column.names[self.name]):
                new_columns[cname] = set()
                for col in cols:
                    new_columns[cname].add(col)
                    col.nested_path = [new_path] + col.nested_path

        # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO?

        # DEFINE A NEW TABLE?
        # LOAD THE COLUMNS
        command = "PRAGMA table_info(" + quote_table(destination_table) + ")"
        details = self.db.query(command)
        if details.data:
            raise Log.error("not expected, new nesting!")
        from jx_sqlite.query_table import QueryTable
        self.nested_tables[new_path] = sub_table = QueryTable(
            destination_table, self.db, exists=False)

        self.db.execute("ALTER TABLE " + quote_table(sub_table.name) +
                        " ADD COLUMN " + quoted_PARENT + " INTEGER")
        self.db.execute("ALTER TABLE " + quote_table(sub_table.name) +
                        " ADD COLUMN " + quote_table(ORDER) + " INTEGER")
        for cname, cols in new_columns.items():
            for c in cols:
                sub_table.add_column(c)

        # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY
        all_cols = [c for _, cols in sub_table.columns.items() for c in cols]
        if not all_cols:
            has_nested_data = "0"
        elif len(all_cols) == 1:
            has_nested_data = _quote_column(all_cols[0]) + " is NOT NULL"
        else:
            has_nested_data = "COALESCE(" + \
                              ",".join(_quote_column(c) for c in all_cols) + \
                              ") IS NOT NULL"

        # FILL TABLE WITH EXISTING COLUMN DATA
        command = "INSERT INTO " + quote_table(destination_table) + "(\n" + \
                  ",\n".join(
                      [quoted_UID, quoted_PARENT, quote_table(ORDER)] +
                      [_quote_column(c) for _, cols in sub_table.columns.items() for c in cols]
                  ) + \
                  "\n)\n" + \
                  "\nSELECT\n" + ",".join(
            [quoted_UID, quoted_UID, "0"] +
            [_quote_column(c) for _, cols in sub_table.columns.items() for c in cols]
        ) + \
                  "\nFROM\n" + quote_table(existing_table) + \
                  "\nWHERE\n" + has_nested_data
        self.db.execute(command)
コード例 #15
0
ファイル: stream.py プロジェクト: rv404674/TUID
def needed(name, required):
    """
    RETURN SUBSET IF name IN REQUIRED
    """
    return [
        relative_field(r, name) if r and startswith_field(r, name) else None
        for r in required
    ]
コード例 #16
0
 def place(parent_doc_details):
     if startswith_field(step,
                         parent_doc_details['nested_path'][0]):
         for c in parent_doc_details['children']:
             if place(c):
                 return True
         parent_doc_details['children'].append(
             nested_doc_details)
コード例 #17
0
def needed(name, required):
    """
    RETURN SUBSET IF name IN REQUIRED
    """
    return [
        relative_field(r, name) if r and startswith_field(r, name) else None
        for r in required
    ]
コード例 #18
0
 def leaves(self, prefix):
     head = self.namespace.get(prefix, None)
     if not head:
         return Null
     full_name = first(head).name
     return set(
         c for k, cs in self.namespace.items()
         if startswith_field(k, full_name) and k != GUID or k == full_name
         for c in cs if c.jx_type not in [OBJECT, EXISTS])
コード例 #19
0
ファイル: es_query.py プロジェクト: klahnakoski/pyLibrary
 def to_es(self, schema, query_path="."):
     output = Aggs.to_es(self, schema, self.path)
     if query_path == self.path:
         Log.error("this should have been cancelled out")
     elif startswith_field(self.path, query_path):
         output['nested'] = {"path": self.path}
     else:
         output["reverse_nested"] = {"path": None if self.path == "." else self.path}
     return output
コード例 #20
0
 def __init__(self, frum, nests):
     Expression.__init__(self, nests)
     self.frum = frum
     self.nests = nests
     last = "."
     for n in reversed(nests):
         path = n.path.var
         if not startswith_field(path, last):
             Log.error("Expecting nests to be reverse nested order")
         last = path
コード例 #21
0
ファイル: table.py プロジェクト: klahnakoski/pyLibrary
    def __getitem__(self, item):
        if isinstance(item, text_type):
            sub_schema = self.schema
            for n in split_field(item):
                if n in sub_schema.more:
                    sub_schema = sub_schema.more.get(n)
                else:
                    sub_schema = sub_schema.values.get(n)

            return Table(
                {k: v for k, v in self.values.items() if startswith_field(k, item)},
                {k: v for k, v in self.reps.items() if startswith_field(k, item)},
                {k: v for k, v in self.defs.items() if startswith_field(k, item)},
                self.num_rows,
                sub_schema,
                self.max_definition_level
            )
        elif isinstance(item, slice):
            start = coalesce(item.start, 0)
            stop = coalesce(item.stop, self.num_rows)

            if start == 0 and stop == self.num_rows:
                return self

            first = 0
            last = 0
            counter = 0
            for i, r in enumerate(self.reps):
                if counter == start:
                    first = i
                elif counter == stop:
                    last = i
                    break
                if r == 0:
                    counter += 1

            return Table(
                {k: v[first:last] for k, v in self.values.items()},
                {k: v[first:last] for k, v in self.reps.items()},
                {k: v[first:last] for k, v in self.defs.items()},
                stop - start,
                self.schema
            )
コード例 #22
0
 def execute_query(self, query):
     try:
         if startswith_field(query["from"], self._index.name):
             return self._index.query(deepcopy(query))
         elif query["from"] == "meta.columns":
             return self._index.query_metadata(deepcopy(query))
         else:
             Log.error("Do not know how to handle")
     except Exception as e:
         Log.error("Failed query", e)
コード例 #23
0
ファイル: es_query.py プロジェクト: klahnakoski/auth0-api
 def to_es(self, schema, query_path="."):
     output = Aggs.to_es(self, schema, self.path)
     if query_path == self.path:
         Log.error("this should have been cancelled out")
     elif startswith_field(self.path, query_path):
         output['nested'] = {"path": self.path}
     else:
         output["reverse_nested"] = {
             "path": None if self.path == "." else self.path
         }
     return output
コード例 #24
0
    def __init__(self, db):
        self.db = db
        self._snowflakes = {}  # MAP FROM BASE TABLE TO LIST OF NESTED TABLES
        self._columns = ColumnList()

        # FIND ALL TABLES
        result = self.db.query(
            "SELECT * FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = wrap([{k: d[i]
                        for i, k in enumerate(result.header)}
                       for d in result.data])
        last_nested_path = []
        for table in tables:
            if table.name.startswith("__"):
                continue
            base_table, nested_path = tail_field(table.name)

            # FIND COMMON NESTED PATH SUFFIX
            for i, p in enumerate(last_nested_path):
                if startswith_field(nested_path, p):
                    last_nested_path = last_nested_path[i:]
                    break
            else:
                last_nested_path = []

            full_nested_path = [nested_path] + last_nested_path
            nested_tables = self._snowflakes.setdefault(
                base_table, [nested_path] + last_nested_path)
            nested_tables.append(
                jx_base.TableDesc(name=table.name,
                                  nested_path=full_nested_path))

            # LOAD THE COLUMNS
            command = "PRAGMA table_info" + sql_iso(quote_column(table.name))
            details = self.db.query(command)

            for cid, name, dtype, notnull, dfft_value, pk in details.data:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                self._columns.add(
                    Column(
                        name=cname,  # I THINK COLUMNS HAVE THIER FULL PATH
                        jx_type=coalesce(
                            ctype, {
                                "TEXT": "string",
                                "REAL": "number",
                                "INTEGER": "integer"
                            }.get(dtype)),
                        nested_path=full_nested_path,
                        es_type=dtype,
                        es_column=name,
                        es_index=table.name))
            last_nested_path = full_nested_path
コード例 #25
0
ファイル: snowflake.py プロジェクト: vpathak2019/jx-sqlite
    def map_to_sql(self, var=""):
        """
        RETURN A MAP FROM THE RELATIVE AND ABSOLUTE NAME SPACE TO COLUMNS 
        """
        origin = self.nested_path[0]
        if startswith_field(var, origin) and origin != var:
            var = relative_field(var, origin)
        fact_dict = {}
        origin_dict = {}
        for k, cs in self.map.items():
            for c in cs:
                if c.type not in STRUCT:
                    if (startswith_field(get_property_name(k), var)):
                        if c.names[origin] in origin_dict:
                            origin_dict[c.names[origin]].append(c)
                        else:
                            origin_dict[c.names[origin]] = [c]

                        if origin != c.nested_path[0]:
                            if c.names["."] in fact_dict:
                                fact_dict[c.names["."]].append(c)
                            else:
                                fact_dict[c.names["."]] = [c]
                    elif origin == var:
                        if concat_field(var, c.names[origin]) in origin_dict:
                            origin_dict[concat_field(
                                var, c.names[origin])].append(c)
                        else:
                            origin_dict[concat_field(var,
                                                     c.names[origin])] = [c]

                        if origin != c.nested_path[0]:
                            if c.names["."] in fact_dict:
                                fact_dict[concat_field(var,
                                                       c.names["."])].append(c)
                            else:
                                fact_dict[concat_field(var,
                                                       c.names["."])] = [c]

        return set_default(origin_dict, fact_dict)
コード例 #26
0
    def _load_from_database(self):
        # FIND ALL TABLES
        result = self.db.query(
            sql_query({
                "from": "sqlite_master",
                "where": {
                    "eq": {
                        "type": "table"
                    }
                },
                "orderby": "name"
            }))
        tables = wrap([{k: d
                        for k, d in zip(result.header, row)}
                       for row in result.data])
        last_nested_path = ["."]
        for table in tables:
            if table.name.startswith("__"):
                continue
            base_table, nested_path = tail_field(table.name)

            # FIND COMMON NESTED PATH SUFFIX
            if nested_path == ".":
                last_nested_path = []
            else:
                for i, p in enumerate(last_nested_path):
                    if startswith_field(nested_path, p):
                        last_nested_path = last_nested_path[i:]
                        break
                else:
                    last_nested_path = []

            full_nested_path = [nested_path] + last_nested_path
            self._snowflakes[literal_field(base_table)] += [full_nested_path]

            # LOAD THE COLUMNS
            details = self.db.about(table.name)

            for cid, name, dtype, notnull, dfft_value, pk in details:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                self.add(
                    Column(name=cname,
                           jx_type=coalesce(sql_type_to_json_type.get(ctype),
                                            IS_NULL),
                           nested_path=full_nested_path,
                           es_type=dtype,
                           es_column=name,
                           es_index=table.name,
                           last_updated=Date.now()))
            last_nested_path = full_nested_path
コード例 #27
0
ファイル: snowflake.py プロジェクト: nknick99/MySQL-to-S3
    def read_db(self):
        """
        PULL SCHEMA FROM DATABASE, BUILD THE MODEL
        :return: None
        """

        # FIND ALL TABLES
        result = self.db.query(
            "SELECT * FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = wrap([{k: d[i]
                        for i, k in enumerate(result.header)}
                       for d in result.data])
        tables_found = False
        for table in tables:
            if table.name.startswith("__"):
                continue
            tables_found = True
            nested_path = [
                join_field(split_field(tab.name)[1:])
                for tab in jx.reverse(tables)
                if startswith_field(table.name, tab.name)
            ]
            self.add_table_to_schema(nested_path)

            # LOAD THE COLUMNS
            command = "PRAGMA table_info" + sql_iso(quote_column(table.name))
            details = self.db.query(command)

            for cid, name, dtype, notnull, dfft_value, pk in details.data:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                column = Column(names={
                    np: relative_field(cname, np)
                    for np in nested_path
                },
                                type=coalesce(
                                    ctype, {
                                        "TEXT": "string",
                                        "REAL": "number",
                                        "INTEGER": "integer"
                                    }.get(dtype)),
                                nested_path=nested_path,
                                es_column=name,
                                es_index=table.name)

                self.add_column_to_schema(column)

        return tables_found
コード例 #28
0
 def __init__(self, frum, nests):
     """
     A SEQUENCE OF NESTED (INNER) JOINS FOR A QUERY
     :param frum: THE TABLE OF DOCUMENTS
     :param nests: LIST OF INNER JOINS (deepest first)
     """
     Expression.__init__(self, nests)
     self.frum = frum
     self.nests = nests
     last = "."
     for n in reversed(nests):
         path = n.path.var
         if not startswith_field(path, last):
             Log.error("Expecting nests to be reverse nested order")
         last = path
コード例 #29
0
ファイル: utils.py プロジェクト: klahnakoski/pyLibrary
def split_nested_inner_variables(where, focal_path, var_to_columns):
    """
    SOME VARIABLES ARE BOTH NESTED AND INNER, EXPAND QUERY TO HANDLE BOTH
    :param where:
    :param focal_path:
    :param var_to_columns:
    :return:
    """
    wheres = [where]

    # WE DO THIS EXPANSION TO CAPTURE A VARIABLE OVER DIFFERENT NESTED LEVELS
    # EXPAND VARS TO COLUMNS, MULTIPLY THE EXPRESSIONS
    for v, cols in var_to_columns.items():
        more_exprs = []
        if not cols:
            for e in wheres:
                more_exprs.append(e.map({v: NULL}))
        else:
            for c in cols:
                deepest = c.nested_path[0]
                for e in wheres:
                    if startswith_field(focal_path, deepest):
                        more_exprs.append(
                            e.map({
                                v:
                                Variable(c.es_column,
                                         type=c.jx_type,
                                         multi=c.multi)
                            }))
                    else:
                        more_exprs.append(
                            e.map({
                                v:
                                NestedOp(
                                    path=Variable(deepest),
                                    select=Variable(c.es_column),
                                    where=Variable(c.es_column).exists(),
                                )
                            }))
        wheres = more_exprs
        var_to_columns = {
            c.es_column: [c]
            for cs in var_to_columns.values() for c in cs
        }

    return OrOp(wheres)
コード例 #30
0
ファイル: meta.py プロジェクト: klahnakoski/tuid_experiment
 def leaves(self, column_name):
     """
     :param column_name:
     :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     deep_path = self.query_path[0]
     for path in self.query_path:
         output = [
             c for c in columns
             if ((c.names['.'] != "_id" or column_name == "_id")
                 and c.jx_type not in OBJECTS and startswith_field(
                     unnest_path(c.names[path]), column_name))
         ]
         if output:
             return output
     return []
コード例 #31
0
    def _load_from_database(self):
        # FIND ALL TABLES
        result = self.db.query(
            "SELECT * FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = wrap([{k: d
                        for k, d in zip(result.header, row)}
                       for row in result.data])
        last_nested_path = []
        for table in tables:
            if table.name.startswith("__"):
                continue
            base_table, nested_path = tail_field(table.name)

            # FIND COMMON NESTED PATH SUFFIX
            for i, p in enumerate(last_nested_path):
                if startswith_field(nested_path, p):
                    last_nested_path = last_nested_path[i:]
                    break
            else:
                last_nested_path = []

            full_nested_path = [nested_path] + last_nested_path
            self._snowflakes[literal_field(base_table)] += [full_nested_path]

            # LOAD THE COLUMNS
            command = "PRAGMA table_info" + sql_iso(quote_column(table.name))
            details = self.db.query(command)

            for cid, name, dtype, notnull, dfft_value, pk in details.data:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                self.add(
                    Column(name=cname,
                           jx_type=coalesce(sql_type_to_json_type.get(ctype),
                                            IS_NULL),
                           nested_path=full_nested_path,
                           es_type=dtype,
                           es_column=name,
                           es_index=table.name,
                           last_updated=Date.now()))
            last_nested_path = full_nested_path
コード例 #32
0
ファイル: meta.py プロジェクト: rv404674/TUID
 def leaves(self, column_name):
     """
     :param column_name:
     :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     deep_path = self.query_path[0]
     for path in self.query_path:
         output = [
             c
             for c in columns
             if (
                 (c.names['.'] != "_id" or column_name == "_id") and
                 c.jx_type not in OBJECTS and
                 startswith_field(unnest_path(c.names[path]), column_name)
             )
         ]
         if output:
             return output
     return []
コード例 #33
0
ファイル: expressions.py プロジェクト: vpathak2019/jx-sqlite
def to_sql(self, schema, not_null=False, boolean=False):
    if not isinstance(self.term, Variable):
        Log.error("Can only handle Variable")
    term = self.term.var
    prefix_length = len(split_field(term))
    db_columns = []
    for n, cols in schema.map_to_sql(term).items():
        for c in cols:
            col = schema.get_column_name(c)
            if startswith_field(col, term):
                db_columns.append({
                    "name":
                    join_field(split_field(col)[prefix_length:]),
                    "sql":
                    Variable(col).to_sql(schema)[0].sql
                })
            else:
                db_columns.append({
                    "name": col,
                    "sql": Variable(col).to_sql(schema)[0].sql
                })

    return wrap(db_columns)
コード例 #34
0
    def query(self, query):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        if not startswith_field(query['from'], self.sf.fact):
            Log.error("Expecting table, or some nested table")
        frum, query['from'] = query['from'], self
        table = self.sf.tables[relative_field(frum, self.sf.fact)]
        schema = table.schema
        query = QueryOp.wrap(query, table=table, schema=schema)
        new_table = "temp_" + unique_name()

        if query.format == "container":
            create_table = "CREATE TABLE " + quote_column(new_table) + " AS "
        else:
            create_table = ""

        if query.groupby and query.format != "cube":
            op, index_to_columns = self._groupby_op(query, frum)
            command = create_table + op
        elif query.groupby:
            query.edges, query.groupby = query.groupby, query.edges
            op, index_to_columns = self._edges_op(query, frum)
            command = create_table + op
            query.edges, query.groupby = query.groupby, query.edges
        elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate):
            op, index_to_columns = self._edges_op(query, frum)
            command = create_table + op
        else:
            op = self._set_op(query, frum)
            return op

        result = self.db.query(command)

        if query.format == "container":
            output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True)
        elif query.format == "cube" or (not query.format and query.edges):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name

            if len(query.edges) == 0 and len(query.groupby) == 0:
                data = {n: Data() for n in column_names}
                for s in index_to_columns.values():
                    data[s.push_name][s.push_child] = unwrap(s.pull(result.data[0]))
                if isinstance(query.select, list):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    data=unwrap(data),
                    select=select,
                    meta={"format": "cube"}
                )

            if not result.data:
                edges = []
                dims = []
                for i, e in enumerate(query.edges + query.groupby):
                    allowNulls = coalesce(e.allowNulls, True)

                    if e.domain.type == "set" and e.domain.partitions:
                        domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                    elif e.domain.type == "range":
                        domain = e.domain
                    elif isinstance(e.value, TupleOp):
                        pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name],
                                        "push_child").pull
                        parts = [tuple(p(d) for p in pulls) for d in result.data]
                        domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                    else:
                        domain = SimpleSetDomain(partitions=[])

                    dims.append(1 if allowNulls else 0)
                    edges.append(Data(
                        name=e.name,
                        allowNulls=allowNulls,
                        domain=domain
                    ))

                data = {}
                for si, s in enumerate(listwrap(query.select)):
                    if s.aggregate == "count":
                        data[s.name] = Matrix(dims=dims, zeros=0)
                    else:
                        data[s.name] = Matrix(dims=dims)

                if isinstance(query.select, list):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    meta={"format": "cube"},
                    edges=edges,
                    select=select,
                    data={k: v.cube for k, v in data.items()}
                )

            columns = None

            edges = []
            dims = []
            for g in query.groupby:
                g.is_groupby = True

            for i, e in enumerate(query.edges + query.groupby):
                allowNulls = coalesce(e.allowNulls, True)

                if e.domain.type == "set" and e.domain.partitions:
                    domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                elif e.domain.type == "range":
                    domain = e.domain
                elif e.domain.type == "time":
                    domain = wrap(mo_json.scrub(e.domain))
                elif e.domain.type == "duration":
                    domain = wrap(mo_json.scrub(e.domain))
                elif isinstance(e.value, TupleOp):
                    pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull
                    parts = [tuple(p(d) for p in pulls) for d in result.data]
                    domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                else:
                    if not columns:
                        columns = zip(*result.data)
                    parts = set(columns[i])
                    if e.is_groupby and None in parts:
                        allowNulls = True
                    parts -= {None}

                    if query.sort[i].sort == -1:
                        domain = SimpleSetDomain(partitions=wrap(sorted(parts, reverse=True)))
                    else:
                        domain = SimpleSetDomain(partitions=jx.sort(parts))

                dims.append(len(domain.partitions) + (1 if allowNulls else 0))
                edges.append(Data(
                    name=e.name,
                    allowNulls=allowNulls,
                    domain=domain
                ))

            data_cubes = {}
            for si, s in enumerate(listwrap(query.select)):
                if s.aggregate == "count":
                    data_cubes[s.name] = Matrix(dims=dims, zeros=0)
                else:
                    data_cubes[s.name] = Matrix(dims=dims)

            r2c = index_to_coordinate(dims)  # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM
            for rownum, row in enumerate(result.data):
                coord = r2c(rownum)

                for i, s in enumerate(index_to_columns.values()):
                    if s.is_edge:
                        continue
                    if s.push_child == ".":
                        data_cubes[s.push_name][coord] = s.pull(row)
                    else:
                        data_cubes[s.push_name][coord][s.push_child] = s.pull(row)

            if query.select == None:
                select = Null
            elif isinstance(query.select, list):
                select = [{"name": s.name} for s in query.select]
            else:
                select = {"name": query.select.name}

            return Data(
                meta={"format": "cube"},
                edges=edges,
                select=select,
                data={k: v.cube for k, v in data_cubes.items()}
            )
        elif query.format == "table" or (not query.format and query.groupby):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name
            data = []
            for d in result.data:
                row = [None for _ in column_names]
                for s in index_to_columns.values():
                    if s.push_child == ".":
                        row[s.push_column] = s.pull(d)
                    elif s.num_push_columns:
                        tuple_value = row[s.push_column]
                        if tuple_value == None:
                            tuple_value = row[s.push_column] = [None] * s.num_push_columns
                        tuple_value[s.push_child] = s.pull(d)
                    elif row[s.push_column] == None:
                        row[s.push_column] = Data()
                        row[s.push_column][s.push_child] = s.pull(d)
                    else:
                        row[s.push_column][s.push_child] = s.pull(d)
                data.append(tuple(unwrap(r) for r in row))

            output = Data(
                meta={"format": "table"},
                header=column_names,
                data=data
            )
        elif query.format == "list" or (not query.edges and not query.groupby):
            if not query.edges and not query.groupby and any(listwrap(query.select).aggregate):
                if isinstance(query.select, list):
                    data = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            if data[c.push_name] == None:
                                data[c.push_name] = c.pull(result.data[0])
                            elif isinstance(data[c.push_name], list):
                                data[c.push_name].append(c.pull(result.data[0]))
                            else:
                                data[c.push_name] = [data[c.push_name], c.pull(result.data[0])]
                        else:
                            data[c.push_name][c.push_child] = c.pull(result.data[0])

                    output = Data(
                        meta={"format": "value"},
                        data=data
                    )
                else:
                    data = Data()
                    for s in index_to_columns.values():
                        if not data[s.push_child]:
                            data[s.push_child] = s.pull(result.data[0])
                        else:
                            data[s.push_child] += [s.pull(result.data[0])]
                    output = Data(
                        meta={"format": "value"},
                        data=unwrap(data)
                    )
            else:
                data = []
                for rownum in result.data:
                    row = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            row[c.push_name] = c.pull(rownum)
                        elif c.num_push_columns:
                            tuple_value = row[c.push_name]
                            if not tuple_value:
                                tuple_value = row[c.push_name] = [None] * c.num_push_columns
                            tuple_value[c.push_child] = c.pull(rownum)
                        else:
                            row[c.push_name][c.push_child] = c.pull(rownum)

                    data.append(row)

                output = Data(
                    meta={"format": "list"},
                    data=data
                )
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output
コード例 #35
0
ファイル: __init__.py プロジェクト: klahnakoski/pyLibrary
 def _none_to_column(schema, path, rep_level, def_level):
     for full_path in all_schema.leaves:
         if startswith_field(full_path, path):
             reps[full_path].append(rep_level)
             defs[full_path].append(def_level)
コード例 #36
0
ファイル: __init__.py プロジェクト: klahnakoski/pyLibrary
    def __init__(
        self,
        host,
        index,
        type=None,
        name=None,
        port=9200,
        read_only=True,
        timeout=None,  # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
        wait_for_active_shards=1,  # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency)
        typed=None,
        kwargs=None
    ):
        Container.__init__(self)
        if not container.config.default:
            container.config.default = {
                "type": "elasticsearch",
                "settings": unwrap(kwargs)
            }
        self.settings = kwargs
        self.name = name = coalesce(name, index)
        if read_only:
            self.es = elasticsearch.Alias(alias=index, kwargs=kwargs)
        else:
            self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)

        self._namespace = ElasticsearchMetadata(kwargs=kwargs)
        self.settings.type = self.es.settings.type
        self.edges = Data()
        self.worker = None

        columns = self.snowflake.columns  # ABSOLUTE COLUMNS
        is_typed = any(c.es_column == EXISTS_TYPE for c in columns)

        if typed == None:
            # SWITCH ON TYPED MODE
            self.typed = is_typed
        else:
            if is_typed != typed:
                Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed)
            self.typed = typed

        if not typed:
            # ADD EXISTENCE COLUMNS
            all_paths = {".": None}  # MAP FROM path TO parent TO MAKE A TREE

            def nested_path_of(v):
                if not v:
                    return []
                else:
                    return [v] + nested_path_of(all_paths[v])

            all = sort_using_key(set(step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p)))
            for step in sorted(all):
                if step in all_paths:
                    continue
                else:
                    best = '.'
                    for candidate in all_paths.keys():
                        if startswith_field(step, candidate):
                            if startswith_field(candidate, best):
                                best = candidate
                    all_paths[step] = best
            for p in all_paths.keys():
                nested_path = nested_path_of(all_paths[p])
                if not nested_path:
                    nested_path = ['.']
                self.namespace.meta.columns.add(Column(
                    name=p,
                    es_column=p,
                    es_index=self.name,
                    es_type=OBJECT,
                    jx_type=EXISTS,
                    nested_path=nested_path,
                    last_updated=Date.now()
                ))
コード例 #37
0
        def _flatten(data,
                     uid,
                     parent_id,
                     order,
                     full_path,
                     nested_path,
                     row=None,
                     guid=None):
            """
            :param data: the data we are pulling apart
            :param uid: the uid we are giving this doc
            :param parent_id: the parent id of this (sub)doc
            :param order: the number of siblings before this one
            :param full_path: path to this (sub)doc
            :param nested_path: list of paths, deepest first
            :param row: we will be filling this
            :return:
            """
            table = concat_field(self.sf.fact, nested_path[0])
            insertion = doc_collection[nested_path[0]]
            if not row:
                row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order}
                insertion.rows.append(row)

            if not isinstance(data, Mapping):
                data = {".": data}
            for k, v in data.items():
                insertion = doc_collection[nested_path[0]]
                cname = concat_field(full_path, literal_field(k))
                value_type = get_type(v)
                if value_type is None:
                    continue

                if value_type in STRUCT:
                    c = unwraplist(
                        [cc for cc in abs_schema[cname] if cc.type in STRUCT])
                else:
                    c = unwraplist([
                        cc for cc in abs_schema[cname] if cc.type == value_type
                    ])

                if not c:
                    # WHAT IS THE NESTING LEVEL FOR THIS PATH?
                    deeper_nested_path = "."
                    for path, _ in nested_tables.items():
                        if startswith_field(
                                cname,
                                path) and len(deeper_nested_path) < len(path):
                            deeper_nested_path = path

                    c = Column(names={".": cname},
                               type=value_type,
                               es_column=typed_column(cname, value_type),
                               es_index=table,
                               nested_path=nested_path)
                    abs_schema.add(cname, c)
                    if value_type == "nested":
                        nested_tables[cname] = "fake table"

                    required_changes.append({"add": c})

                    # INSIDE IF BLOCK BECAUSE WE DO NOT WANT IT TO ADD WHAT WE columns.get() ALREADY
                    insertion.active_columns.add(c)
                elif c.type == "nested" and value_type == "object":
                    value_type = "nested"
                    v = [v]
                elif len(c.nested_path) < len(nested_path):
                    from_doc = doc_collection.get(c.nested_path[0], None)
                    column = c.es_column
                    from_doc.active_columns.remove(c)
                    abs_schema.remove(cname, c)
                    required_changes.append({"nest": (c, nested_path[0])})
                    deep_c = Column(names={".": cname},
                                    type=value_type,
                                    es_column=typed_column(cname, value_type),
                                    es_index=table,
                                    nested_path=nested_path)
                    abs_schema.add(cname, deep_c)
                    insertion.active_columns.add(deep_c)

                    for r in from_doc.rows:
                        r1 = unwrap(r)
                        if column in r1:
                            row1 = {
                                UID: self.next_uid(),
                                PARENT: r1["__id__"],
                                ORDER: 0,
                                column: r1[column]
                            }
                            insertion.rows.append(row1)

                elif len(c.nested_path) > len(nested_path):
                    insertion = doc_collection[c.nested_path[0]]
                    row = {UID: self.next_uid(), PARENT: uid, ORDER: order}
                    insertion.rows.append(row)

                # BE SURE TO NEST VALUES, IF NEEDED
                if value_type == "nested":
                    row[c.es_column] = "."
                    deeper_nested_path = [cname] + nested_path
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        insertion = doc_collection[cname] = Data(
                            active_columns=set(), rows=[])
                    for i, r in enumerate(v):
                        child_uid = self.next_uid()
                        _flatten(r, child_uid, uid, i, cname,
                                 deeper_nested_path)
                elif value_type == "object":
                    row[c.es_column] = "."
                    _flatten(v,
                             uid,
                             parent_id,
                             order,
                             cname,
                             nested_path,
                             row=row)
                elif c.type:
                    row[c.es_column] = v
コード例 #38
0
ファイル: schema.py プロジェクト: rv404674/TUID
def _indexer(columns, query_path):
    all_names = set(unnest_path(n) for c in columns for n in c.names.values()) | {"."}

    lookup_leaves = {}  # ALL LEAF VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (
                startswith_field(nfp, full_name) and
                c.es_type not in [EXISTS, OBJECT, NESTED] and
                (c.es_column != "_id" or full_name == "_id")
            ):
                cs = lookup_leaves.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_leaves.setdefault(untype_path(full_name), set())
                cs.add(c)

    lookup_variables = {}  # ALL NOT-NESTED VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (
                startswith_field(nfp, full_name) and
                c.es_type not in [EXISTS, OBJECT] and
                (c.es_column != "_id" or full_name == "_id") and
                startswith_field(c.nested_path[0], query_path)
            ):
                cs = lookup_variables.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_variables.setdefault(untype_path(full_name), set())
                cs.add(c)

    relative_lookup = {}
    for c in columns:
        try:
            cname = c.names[query_path]
            cs = relative_lookup.setdefault(cname, set())
            cs.add(c)

            ucname = untype_path(cname)
            cs = relative_lookup.setdefault(ucname, set())
            cs.add(c)
        except Exception as e:
            Log.error("Should not happen", cause=e)

    if query_path != ".":
        # ADD ABSOLUTE NAMES TO THE NAMESAPCE
        absolute_lookup, more_leaves, more_variables = _indexer(columns, ".")
        for k, cs in absolute_lookup.items():
            if k not in relative_lookup:
                relative_lookup[k] = cs
        for k, cs in more_leaves.items():
            if k not in lookup_leaves:
                lookup_leaves[k] = cs
        for k, cs in more_variables.items():
            if k not in lookup_variables:
                lookup_variables[k] = cs

    return relative_lookup, lookup_leaves, lookup_variables
コード例 #39
0
ファイル: deep.py プロジェクト: klahnakoski/annotations
def es_deepop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es_query_template(query_path)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, schema)
    for f, w in zip_longest(es_filters, wheres):
        script = ES52[AndOp(w)].partial_eval().to_esfilter(schema)
        set_default(f, script)

    if not wheres[1]:
        # INCLUDE DOCS WITH NO NESTED DOCS
        more_filter = {
            "bool": {
                "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)],
                "must_not": {
                    "nested": {
                        "path": query_path,
                        "query": MATCH_ALL
                    }
                }
            }
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)

    map_to_es_columns = schema.map_to_es()
    query_for_es = query.map(map_to_es_columns)
    es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)

    es_query.stored_fields = []

    is_list = is_list_(query.select)
    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
            leaves = schema.leaves(select.value.term.var)
            col_names = set()
            for c in leaves:
                if c.nested_path[0] == ".":
                    if c.jx_type == NESTED:
                        continue
                    es_query.stored_fields += [c.es_column]
                c_name = untype_path(relative_field(c.name, query_path))
                col_names.add(c_name)
                new_select.append({
                    "name": concat_field(select.name, c_name),
                    "nested_path": c.nested_path[0],
                    "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."},
                    "pull": get_pull_function(c)
                })
                put_index += 1

            # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
            for n in new_select:
                if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                    n.put.name = n.name = n.name.lstrip(".")
                    col_names.add(n.name)
        elif is_op(select.value, Variable):
            net_columns = schema.leaves(select.value.var)
            if not net_columns:
                new_select.append({
                    "name": select.name,
                    "nested_path": ".",
                    "put": {"name": select.name, "index": put_index, "child": "."},
                    "pull": NULL
                })
            else:
                for n in net_columns:
                    pull = get_pull_function(n)
                    if n.nested_path[0] == ".":
                        if n.jx_type == NESTED:
                            continue
                        es_query.stored_fields += [n.es_column]

                    # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
                    for np in n.nested_path:
                        c_name = untype_path(relative_field(n.name, np))
                        if startswith_field(c_name, select.value.var):
                            # PREFER THE MOST-RELATIVE NAME
                            child = relative_field(c_name, select.value.var)
                            break
                    else:
                        continue

                    new_select.append({
                        "name": select.name,
                        "pull": pull,
                        "nested_path": n.nested_path[0],
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": child
                        }
                    })
            put_index += 1
        else:
            expr = select.value
            for v in expr.vars():
                for c in schema[v.var]:
                    if c.nested_path[0] == ".":
                        es_query.stored_fields += [c.es_column]
                    # else:
                    #     Log.error("deep field not expected")

            pull_name = EXPRESSION_PREFIX + select.name
            map_to_local = MapToLocal(schema)
            pull = jx_expression_to_function(pull_name)
            post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local))

            new_select.append({
                "name": select.name if is_list else ".",
                "pull": pull,
                "value": expr.__data__(),
                "put": {"name": select.name, "index": put_index, "child": "."}
            })
            put_index += 1

    es_query.stored_fields = sorted(es_query.stored_fields)

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es_post(
            es,
            Data(
                query=more_filter,
                stored_fields=es_query.stored_fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    # </COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
コード例 #40
0
ファイル: deep.py プロジェクト: rv404674/TUID
def es_deepop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es_query_template(query_path)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, schema)
    for i, f in enumerate(es_filters):
        script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema)
        set_default(f, script)

    if not wheres[1]:
        # WITHOUT NESTED CONDITIONS, WE MUST ALSO RETURN DOCS WITH NO NESTED RECORDS
        more_filter = {
            "and": [
                es_filters[0],
                {"missing": {"field": untype_path(query_path) + "." + EXISTS_TYPE}}
            ]
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)

    # es_query.sort = jx_sort_to_es_sort(query.sort)
    map_to_es_columns = schema.map_to_es()
    # {c.names["."]: c.es_column for c in schema.leaves(".")}
    query_for_es = query.map(map_to_es_columns)
    es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)

    es_query.fields = []

    is_list = isinstance(query.select, list)
    new_select = FlatList()

    i = 0
    for s in listwrap(query.select):
        if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable):
            # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
            leaves = schema.leaves(s.value.term.var)
            col_names = set()
            for c in leaves:
                if c.nested_path[0] == ".":
                    if c.jx_type == NESTED:
                        continue
                    es_query.fields += [c.es_column]
                c_name = untype_path(c.names[query_path])
                col_names.add(c_name)
                new_select.append({
                    "name": concat_field(s.name, c_name),
                    "nested_path": c.nested_path[0],
                    "put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."},
                    "pull": get_pull_function(c)
                })
                i += 1

            # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
            for n in new_select:
                if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                    n.put.name = n.name = n.name.lstrip(".")
                    col_names.add(n.name)
        elif isinstance(s.value, Variable):
            net_columns = schema.leaves(s.value.var)
            if not net_columns:
                new_select.append({
                    "name": s.name,
                    "nested_path": ".",
                    "put": {"name": s.name, "index": i, "child": "."},
                    "pull": NULL
                })
            else:
                for n in net_columns:
                    pull = get_pull_function(n)
                    if n.nested_path[0] == ".":
                        if n.jx_type == NESTED:
                            continue
                        es_query.fields += [n.es_column]

                    # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
                    for np in n.nested_path:
                        c_name = untype_path(n.names[np])
                        if startswith_field(c_name, s.value.var):
                            child = relative_field(c_name, s.value.var)
                            break
                    else:
                        child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var)

                    new_select.append({
                        "name": s.name,
                        "pull": pull,
                        "nested_path": n.nested_path[0],
                        "put": {
                            "name": s.name,
                            "index": i,
                            "child": child
                        }
                    })
            i += 1
        else:
            expr = s.value
            for v in expr.vars():
                for c in schema[v.var]:
                    if c.nested_path[0] == ".":
                        es_query.fields += [c.es_column]
                    # else:
                    #     Log.error("deep field not expected")

            pull_name = EXPRESSION_PREFIX + s.name
            map_to_local = MapToLocal(schema)
            pull = jx_expression_to_function(pull_name)
            post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())

            new_select.append({
                "name": s.name if is_list else ".",
                "pull": pull,
                "value": expr.__data__(),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es_post(
            es,
            Data(
                query={"filtered": {"filter": more_filter}},
                fields=es_query.fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    #</COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)