Esempio n. 1
0
    def where(self, filter):
        """
        WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES
        :param filter:  jx_expression filter
        :return: list of objects that match
        """
        select = []
        column_names = []
        for cname, cs in self.columns.items():
            cs = [
                c for c in cs
                if c.type not in STRUCT and len(c.nested_path) == 1
            ]
            if len(cs) == 0:
                continue
            column_names.append(cname)
            if len(cs) == 1:
                select.append(
                    quote_table(c.es_column) + " " + quote_table(c.name))
            else:
                select.append("coalesce(" +
                              ",".join(quote_table(c.es_column) for c in cs) +
                              ") " + quote_table(c.name))

        result = self.db.query(" SELECT " + "\n,".join(select) + " FROM " +
                               quote_table(self.sf.fact) + " WHERE " +
                               jx_expression(filter).to_sql())
        return wrap([{c: v
                      for c, v in zip(column_names, r)} for r in result.data])
Esempio n. 2
0
    def _groupby_op(self, query, frum):
        columns = self._get_sql_schema(frum)
        index_to_column = {}
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.nested_tables.items())
        }

        selects = []
        groupby = []
        for i, e in enumerate(query.groupby):
            column_number = len(selects)
            sql_type, sql = e.value.to_sql(self)[0].sql.items()[0]
            groupby.append(sql)
            selects.append(sql + " AS " + e.name)

            index_to_column[column_number] = Data(
                is_edge=True,
                push_name=e.name,
                push_column=column_number,
                push_child=".",
                pull=get_column(column_number),
                sql=sql,
                type=sql_type_to_json_type[sql_type])

        for s in listwrap(query.select):
            column_number = len(selects)
            sql_type, sql = s.value.to_sql(self)[0].sql.items()[0]

            if s.value == "." and s.aggregate == "count":
                selects.append("COUNT(1) AS " + quote_table(s.name))
            else:
                selects.append(sql_aggs[s.aggregate] + "(" + sql + ") AS " +
                               quote_table(s.name))

            index_to_column[column_number] = Data(
                push_name=s.name,
                push_column=column_number,
                push_child=".",
                pull=get_column(column_number),
                sql=sql,
                type=sql_type_to_json_type[sql_type])

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        where = query.where.to_sql(self)[0].sql.b

        command = "SELECT\n" + (",\n".join(selects)) + \
                  "\nFROM\n" + quote_table(self.name) + " " + nest_to_alias["."] + \
                  "\nWHERE\n" + where + \
                  "\nGROUP BY\n" + ",\n".join(groupby)

        return command, index_to_column
Esempio n. 3
0
    def _window_op(self, query, window):
        # http://www2.sqlite.org/cvstrac/wiki?p=UnsupportedSqlAnalyticalFunctions
        if window.value == "rownum":
            return "ROW_NUMBER()-1 OVER (" + \
                   " PARTITION BY " + (", ".join(window.edges.values)) + \
                   " ORDER BY " + (", ".join(window.edges.sort)) + \
                   ") AS " + quote_table(window.name)

        range_min = text_type(coalesce(window.range.min, "UNBOUNDED"))
        range_max = text_type(coalesce(window.range.max, "UNBOUNDED"))

        return sql_aggs[window.aggregate] + "(" + window.value.to_sql() + ") OVER (" + \
               " PARTITION BY " + (", ".join(window.edges.values)) + \
               " ORDER BY " + (", ".join(window.edges.sort)) + \
               " ROWS BETWEEN " + range_min + " PRECEDING AND " + range_max + " FOLLOWING " + \
               ") AS " + quote_table(window.name)
Esempio n. 4
0
    def create_fact(self, uid=UID):
        """
        MAKE NEW TABLE WITH GIVEN guid
        :param uid: name, or list of names, for the GUID
        :return: None
        """
        self.add_table_to_schema(["."])

        uid = listwrap(uid)
        new_columns = []
        for u in uid:
            if u == UID:
                pass
            else:
                c = Column(names={".": u},
                           type="string",
                           es_column=typed_column(u, "string"),
                           es_index=self.fact)
                self.add_column_to_schema(c)
                new_columns.append(c)

        command = (
            "CREATE TABLE " + quote_table(self.fact) + "(" +
            (",".join([quoted_GUID + " TEXT "] + [quoted_UID + " INTEGER"] + [
                quote_column(c.es_column) + " " + sql_types[c.type]
                for c in self.tables["."].schema.columns
            ])) + ", PRIMARY KEY (" +
            (", ".join([quoted_GUID] + [quoted_UID] + [
                quote_column(c.es_column)
                for c in self.tables["."].schema.columns
            ])) + "))")

        self.db.execute(command)
Esempio n. 5
0
    def _nest_column(self, column, new_path):
        destination_table = join_field([self.name] + split_field(new_path))
        existing_table = join_field([self.name] +
                                    split_field(column.nested_path[0]))

        # FIND THE INNER COLUMNS WE WILL BE MOVING
        new_columns = {}
        for cname, cols in self.columns.items():
            if startswith_field(cname, column.names[self.name]):
                new_columns[cname] = set()
                for col in cols:
                    new_columns[cname].add(col)
                    col.nested_path = [new_path] + col.nested_path

        # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO?

        # DEFINE A NEW TABLE?
        # LOAD THE COLUMNS
        command = "PRAGMA table_info(" + quote_table(destination_table) + ")"
        details = self.db.query(command)
        if details.data:
            raise Log.error("not expected, new nesting!")
        from jx_sqlite.query_table import QueryTable
        self.nested_tables[new_path] = sub_table = QueryTable(
            destination_table, self.db, exists=False)

        self.db.execute("ALTER TABLE " + quote_table(sub_table.name) +
                        " ADD COLUMN " + quoted_PARENT + " INTEGER")
        self.db.execute("ALTER TABLE " + quote_table(sub_table.name) +
                        " ADD COLUMN " + quote_table(ORDER) + " INTEGER")
        for cname, cols in new_columns.items():
            for c in cols:
                sub_table.add_column(c)

        # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY
        all_cols = [c for _, cols in sub_table.columns.items() for c in cols]
        if not all_cols:
            has_nested_data = "0"
        elif len(all_cols) == 1:
            has_nested_data = _quote_column(all_cols[0]) + " is NOT NULL"
        else:
            has_nested_data = "COALESCE(" + \
                              ",".join(_quote_column(c) for c in all_cols) + \
                              ") IS NOT NULL"

        # FILL TABLE WITH EXISTING COLUMN DATA
        command = "INSERT INTO " + quote_table(destination_table) + "(\n" + \
                  ",\n".join(
                      [quoted_UID, quoted_PARENT, quote_table(ORDER)] +
                      [_quote_column(c) for _, cols in sub_table.columns.items() for c in cols]
                  ) + \
                  "\n)\n" + \
                  "\nSELECT\n" + ",".join(
            [quoted_UID, quoted_UID, "0"] +
            [_quote_column(c) for _, cols in sub_table.columns.items() for c in cols]
        ) + \
                  "\nFROM\n" + quote_table(existing_table) + \
                  "\nWHERE\n" + has_nested_data
        self.db.execute(command)
Esempio n. 6
0
    def __getitem__(self, item):
        cs = self.columns.get(item, None)
        if not cs:
            return [Null]

        command = " UNION ALL ".join("SELECT " + _quote_column(c) + " FROM " +
                                     quote_table(c.es_index) for c in cs)

        output = self.db.query(command)
        return [o[0] for o in output]
Esempio n. 7
0
    def _add_column(self, column):
        cname = column.names["."]
        if column.type == "nested":
            # WE ARE ALSO NESTING
            self._nest_column(column, cname)

        table = concat_field(self.fact, column.nested_path[0])

        self.db.execute("ALTER TABLE " + quote_table(table) + " ADD COLUMN " +
                        quote_column(column.es_column) + " " +
                        sql_types[column.type])

        self.add_column_to_schema(column)
Esempio n. 8
0
 def __iter__(self):
     columns = [
         c for c, cs in self.columns.items() for c in cs
         if c.type not in STRUCT
     ]
     command = "SELECT " + \
               ",\n".join(_quote_column(c) for c in columns) + \
               " FROM " + quote_table(self.name)
     rows = self.db.query(command)
     for r in rows:
         output = Data()
         for (k, t), v in zip(columns, r):
             output[k] = v
         yield output
Esempio n. 9
0
    def read_db(self):
        """
        PULL SCHEMA FROM DATABASE, BUILD THE MODEL
        :return: None
        """

        # FIND ALL TABLES
        result = self.db.query(
            "SELECT * FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = wrap([{k: d[i]
                        for i, k in enumerate(result.header)}
                       for d in result.data])
        tables_found = False
        for table in tables:
            if table.name.startswith("__"):
                continue
            tables_found = True
            nested_path = [
                join_field(split_field(tab.name)[1:])
                for tab in jx.reverse(tables)
                if startswith_field(table.name, tab.name)
            ]
            self.add_table_to_schema(nested_path)

            # LOAD THE COLUMNS
            command = "PRAGMA table_info(" + quote_table(table.name) + ")"
            details = self.db.query(command)

            for cid, name, dtype, notnull, dfft_value, pk in details.data:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                column = Column(names={
                    np: relative_field(cname, np)
                    for np in nested_path
                },
                                type=coalesce(
                                    ctype, {
                                        "TEXT": "string",
                                        "REAL": "number",
                                        "INTEGER": "integer"
                                    }.get(dtype)),
                                nested_path=nested_path,
                                es_column=name,
                                es_index=table.name)

                self.add_column_to_schema(column)

        return tables_found
Esempio n. 10
0
    def add_column(self, column):
        """
        ADD COLUMN, IF IT DOES NOT EXIST ALREADY
        """
        if column.name not in self.columns:
            self.columns[column.name] = {column}
        elif column.type not in [c.type for c in self.columns[column.name]]:
            self.columns[column.name].add(column)

        if column.type == "nested":
            nested_table_name = concat_field(self.name, column.name)
            # MAKE THE TABLE
            from jx_sqlite.query_table import QueryTable

            table = QueryTable(nested_table_name, self.db, exists=False)
            self.nested_tables[column.name] = table
        else:
            self.db.execute("ALTER TABLE " + quote_table(self.name) +
                            " ADD COLUMN " + _quote_column(column) + " " +
                            column.type)
Esempio n. 11
0
    def change_schema(self, required_changes):
        required_changes = wrap(required_changes)
        for required_change in required_changes:
            if required_change.add:
                column = required_change.add
                if column.type == "nested":
                    # WE ARE ALSO NESTING
                    self._nest_column(column, column.names[self.name])

                table = join_field([self.name] +
                                   split_field(column.nested_path[0]))

                self.db.execute("ALTER TABLE " + quote_table(table) +
                                " ADD COLUMN " + _quote_column(column) + " " +
                                sql_types[column.type])

                self.columns.add(column)

            elif required_change.nest:
                column = required_change.nest
                new_path = required_change.new_path
                self._nest_column(column, new_path)
Esempio n. 12
0
    def _insert(self, collection):
        for nested_path, details in collection.items():
            active_columns = wrap(list(details.active_columns))
            rows = details.rows
            table_name = concat_field(self.sf.fact, nested_path)

            if table_name == self.sf.fact:
                # DO NOT REQUIRE PARENT OR ORDER COLUMNS
                meta_columns = [GUID, UID]
            else:
                meta_columns = [UID, PARENT, ORDER]

            all_columns = meta_columns + active_columns.es_column

            prefix = "INSERT INTO " + quote_table(table_name) + \
                     "(" + ",".join(map(quote_table, all_columns)) + ")"

            # BUILD THE RECORDS
            records = " UNION ALL ".join(
                "\nSELECT " +
                ",".join(quote_value(row.get(c)) for c in all_columns)
                for row in unwrap(rows))

            self.db.execute(prefix + records)
Esempio n. 13
0
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command['clear']))
        for c in self.get_leaves():
            if c.name in touched_columns and c.nested_path and len(
                    c.name) > len(c.nested_path[0]):
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.type not in STRUCT
        }
        where_sql = where.map(_map).to_sql()
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(names={".": new_column_name},
                            type=ctype,
                            es_index=self.sf.fact,
                            es_column=typed_column(new_column_name, ctype))
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = concat_field(self.sf.fact,
                                                 nested_column_name)
                nested_table = nested_tables[nested_column_name]
                self_primary_key = ",".join(
                    quote_table(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID_PREFIX + "id" + text_type(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = "DELETE FROM " + quote_table(nested_table.name) + \
                              "\nWHERE EXISTS (" + \
                              "\nSELECT 1 " + \
                              "\nFROM " + quote_table(nested_table.name) + " n" + \
                              "\nJOIN (" + \
                              "\nSELECT " + self_primary_key + \
                              "\nFROM " + quote_table(self.sf.fact) + \
                              "\nWHERE " + where_sql + \
                              "\n) t ON " + \
                              " AND ".join(
                                  "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column)
                                  for u in self.uid
                                  for c in self.columns[u]
                              ) + \
                              ")"
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Data(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = "INSERT INTO " + quote_table(nested_table.name) + \
                         "(" + \
                         self_primary_key + "," + \
                         quote_column(extra_key) + "," + \
                         ",".join(
                             quote_table(c.es_column)
                             for c in doc_collection.get(".", Null).active_columns
                         ) + ")"

                # BUILD THE PARENT TABLES
                parent = "\nSELECT " + \
                         self_primary_key + \
                         "\nFROM " + quote_table(self.sf.fact) + \
                         "\nWHERE " + jx_expression(command.where).to_sql()

                # BUILD THE RECORDS
                children = " UNION ALL ".join(
                    "\nSELECT " + quote_value(i) + " " +
                    quote_table(extra_key.es_column) + "," + ",".join(
                        quote_value(row[c.name]) + " " +
                        quote_table(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = prefix + \
                              "\nSELECT " + \
                              ",".join(
                                  "p." + quote_table(c.es_column)
                                  for u in self.uid for c in self.columns[u]
                              ) + "," + \
                              "c." + quote_column(extra_key) + "," + \
                              ",".join(
                                  "c." + quote_table(c.es_column)
                                  for c in doc_collection.get(".", Null).active_columns
                              ) + \
                              "\nFROM (" + parent + ") p " + \
                              "\nJOIN (" + children + \
                              "\n) c on 1=1"

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(names={".": c.name},
                                        type=c.type,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        c.nested_path)
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.type not in [
                                c.type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = (
            "UPDATE " + quote_table(self.sf.fact) + " SET " + ",\n".join([
                quote_column(c) + "=" + quote_value(get_if_type(v, c.type))
                for k, v in command.set.items() if get_type(v) != "nested"
                for c in self.columns[k]
                if c.type != "nested" and len(c.nested_path) == 1
            ] + [
                quote_column(c) + "=NULL"
                for k in listwrap(command['clear']) if k in self.columns
                for c in self.columns[k]
                if c.type != "nested" and len(c.nested_path) == 1
            ]) + " WHERE " + where_sql)

        self.db.execute(command)
Esempio n. 14
0
    def _make_sql_for_one_nest_in_set_op(
            self,
            primary_nested_path,
            selects,  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE
            where_clause,
            active_columns,
            index_to_sql_select  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
    ):
        """
        FOR EACH NESTED LEVEL, WE MAKE A QUERY THAT PULLS THE VALUES/COLUMNS REQUIRED
        WE `UNION ALL` THEM WHEN DONE
        :param primary_nested_path:
        :param selects:
        :param where_clause:
        :param active_columns:
        :param index_to_sql_select:
        :return: SQL FOR ONE NESTED LEVEL
        """

        parent_alias = "a"
        from_clause = ""
        select_clause = []
        children_sql = []
        done = []

        # STATEMENT FOR EACH NESTED PATH
        for i, (nested_path,
                sub_table) in enumerate(self.nested_tables.items()):
            if any(startswith_field(nested_path, d) for d in done):
                continue

            alias = "__" + unichr(ord('a') + i) + "__"

            if primary_nested_path == nested_path:
                select_clause = []
                # ADD SELECT CLAUSE HERE
                for select_index, s in enumerate(selects):
                    sql_select = index_to_sql_select.get(select_index)
                    if not sql_select:
                        select_clause.append(selects[select_index])
                        continue

                    if startswith_field(sql_select.nested_path[0],
                                        nested_path):
                        select_clause.append(sql_select.sql + " AS " +
                                             _make_column_name(select_index))
                    else:
                        # DO NOT INCLUDE DEEP STUFF AT THIS LEVEL
                        select_clause.append("NULL AS " +
                                             _make_column_name(select_index))

                if nested_path == ".":
                    from_clause += "\nFROM " + quote_table(
                        self.name) + " " + alias + "\n"
                else:
                    from_clause += "\nLEFT JOIN " + quote_table(sub_table.name) + " " + alias + "\n" \
                                                                                                " ON " + alias + "." + quoted_PARENT + " = " + parent_alias + "." + quoted_UID + "\n"
                    where_clause = "(" + where_clause + ") AND " + alias + "." + quote_table(
                        ORDER) + " > 0\n"

            elif startswith_field(primary_nested_path, nested_path):
                # PARENT TABLE
                # NO NEED TO INCLUDE COLUMNS, BUT WILL INCLUDE ID AND ORDER
                if nested_path == ".":
                    from_clause += "\nFROM " + quote_table(
                        self.name) + " " + alias + "\n"
                else:
                    parent_alias = alias = unichr(ord('a') + i - 1)
                    from_clause += "\nLEFT JOIN " + quote_table(sub_table.name) + " " + alias + \
                                   " ON " + alias + "." + quoted_PARENT + " = " + parent_alias + "." + quoted_UID
                    where_clause = "(" + where_clause + ") AND " + parent_alias + "." + quote_table(
                        ORDER) + " > 0\n"

            elif startswith_field(nested_path, primary_nested_path):
                # CHILD TABLE
                # GET FIRST ROW FOR EACH NESTED TABLE
                from_clause += "\nLEFT JOIN " + quote_table(sub_table.name) + " " + alias + \
                               " ON " + alias + "." + quoted_PARENT + " = " + parent_alias + "." + quoted_UID + \
                               " AND " + alias + "." + quote_table(ORDER) + " = 0\n"

                # IMMEDIATE CHILDREN ONLY
                done.append(nested_path)
                # NESTED TABLES WILL USE RECURSION
                children_sql.append(
                    self._make_sql_for_one_nest_in_set_op(
                        nested_path,
                        selects,  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE
                        where_clause,
                        active_columns,
                        index_to_sql_select  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
                    ))
            else:
                # SIBLING PATHS ARE IGNORED
                continue

            parent_alias = alias

        sql = "\nUNION ALL\n".join([
            "SELECT\n" + ",\n".join(select_clause) + from_clause +
            "\nWHERE\n" + where_clause
        ] + children_sql)

        return sql
Esempio n. 15
0
    def _set_op(self, query, frum):
        # GET LIST OF COLUMNS
        frum_path = split_field(frum)
        primary_nested_path = join_field(frum_path[1:])
        vars_ = UNION([s.value.vars() for s in listwrap(query.select)])
        schema = self.sf.tables[primary_nested_path].schema

        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path, sub_table) in enumerate(self.sf.tables.items())
        }

        active_columns = {".": []}
        for cname, cols in schema.items():
            if any(startswith_field(cname, v) for v in vars_):
                for c in cols:
                    if c.type in STRUCT:
                        continue
                    nest = c.nested_path[0]
                    active = active_columns.get(nest)
                    if not active:
                        active = active_columns[nest] = []
                    active.append(c)

        for nested_path, s in self.sf.tables.items():
            for cname, cols in s.schema.items():
                if not any(startswith_field(cname, c.names[c.nested_path[0]]) for n, cc in active_columns.items() for c in cc):
                    for c in cols:
                        if c.type in STRUCT:
                            continue
                        nest = c.nested_path[0]
                        active = active_columns.get(nest)
                        if not active:
                            active = active_columns[nest] = []
                        active.append(c)

        # ANY VARS MENTIONED WITH NO COLUMNS?
        for v in vars_:
            if not any(startswith_field(cname, v) for cname in schema.keys()):
                active_columns["."].append(Column(
                    names={".": v},
                    type="null",
                    es_column=".",
                    es_index=".",
                    nested_path=["."]
                ))

        # EVERY COLUMN, AND THE INDEX IT TAKES UP
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        index_to_uid = {}  # FROM NESTED PATH TO THE INDEX OF UID
        sql_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path, sub_table) in enumerate(self.sf.tables.items())
            }

        sorts = []
        if query.sort:
            for s in query.sort:
                col = s.value.to_sql(schema)[0]
                for t, sql in col.sql.items():
                    json_type = sql_type_to_json_type[t]
                    if json_type in STRUCT:
                        continue
                    column_number = len(sql_selects)
                    # SQL HAS ABS TABLE REFERENCE
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(sql + " AS " + column_alias)
                    if s.sort == -1:
                        sorts.append(column_alias + " IS NOT NULL")
                        sorts.append(column_alias + " DESC")
                    else:
                        sorts.append(column_alias + " IS NULL")
                        sorts.append(column_alias)

        selects = []
        primary_doc_details = Data()
        # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH
        # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED
        for nested_path, sub_table in self.sf.tables.items():
            nested_doc_details = {
                "sub_table": sub_table,
                "children": [],
                "index_to_column": {},
                "nested_path": [nested_path]  # fake the real nested path, we only look at [0] anyway
            }

            # INSERT INTO TREE
            if not primary_doc_details:
                primary_doc_details = nested_doc_details
            else:
                def place(parent_doc_details):
                    if startswith_field(nested_path, parent_doc_details['nested_path'][0]):
                        for c in parent_doc_details['children']:
                            if place(c):
                                return True
                        parent_doc_details['children'].append(nested_doc_details)

                place(primary_doc_details)

            alias = nested_doc_details['alias'] = nest_to_alias[nested_path]

            if nested_path=="." and quoted_GUID in vars_:
                column_number = index_to_uid[nested_path] = nested_doc_details['id_coord'] = len(sql_selects)
                sql_select = alias + "." + quoted_GUID
                sql_selects.append(sql_select + " AS " + _make_column_name(column_number))
                index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                    push_name="_id",
                    push_column_name="_id",
                    push_column=0,
                    push_child=".",
                    sql=sql_select,
                    pull=get_column(column_number),
                    type="string",
                    column_alias=_make_column_name(column_number),
                    nested_path=[nested_path]           # fake the real nested path, we only look at [0] anyway
                )
                query.select = [s for s in listwrap(query.select) if s.name!="_id"]


            # WE ALWAYS ADD THE UID AND ORDER
            column_number = index_to_uid[nested_path] = nested_doc_details['id_coord'] = len(sql_selects)
            sql_select = alias + "." + quoted_UID
            sql_selects.append(sql_select + " AS " + _make_column_name(column_number))
            if nested_path !=".":
                index_to_column[column_number]=ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=[nested_path],            # fake the real nested path, we only look at [0] anyway
                    column_alias=_make_column_name(column_number)

                )
                column_number = len(sql_selects)
                sql_select = alias + "." + quote_table(ORDER)
                sql_selects.append(sql_select + " AS " + _make_column_name(column_number))
                index_to_column[column_number]=ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=[nested_path],            # fake the real nested path, we only look at [0] anyway
                    column_alias=_make_column_name(column_number)

                )

            # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM
            if nested_path not in active_columns:
                continue

            if len(active_columns[nested_path]) != 0:
                # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE
                si = 0
                for s in listwrap(query.select):
                    try:
                        column_number = len(sql_selects)
                        s.pull = get_column(column_number)
                        db_columns = s.value.to_sql(schema)

                        if isinstance(s.value, LeavesOp):
                            for column in db_columns:
                                if isinstance(column.nested_path, list):
                                    column.nested_path=column.nested_path[0]
                                if column.nested_path and column.nested_path!=nested_path:
                                    continue
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(column_number)
                                    if concat_field(alias, unsorted_sql) in selects and len(unsorted_sql.split())==1:
                                        continue
                                    selects.append(concat_field(alias, unsorted_sql))
                                    sql_selects.append(alias + "." + unsorted_sql + " AS " + column_alias)
                                    index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                                        push_name=literal_field(get_property_name(concat_field(s.name, column.name))),
                                        push_column_name=get_property_name(concat_field(s.name, column.name)),
                                        push_column=si,
                                        push_child=".",
                                        pull=get_column(column_number),
                                        sql=unsorted_sql,
                                        type=json_type,
                                        column_alias=column_alias,
                                        nested_path=[nested_path]           # fake the real nested path, we only look at [0] anyway
                                    )
                                    si += 1
                        else:
                            for column in db_columns:
                                if isinstance(column.nested_path, list):
                                    column.nested_path=column.nested_path[0]
                                if column.nested_path and column.nested_path!=nested_path:
                                    continue
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(column_number)
                                    if concat_field(alias, unsorted_sql) in selects and len(unsorted_sql.split())==1:
                                        continue
                                    selects.append(concat_field(alias, unsorted_sql))
                                    sql_selects.append(alias + "." + unsorted_sql + " AS " + column_alias)
                                    index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                                        push_name=s.name,
                                        push_column_name=s.name,
                                        push_column=si,
                                        push_child=column.name,
                                        pull=get_column(column_number),
                                        sql=unsorted_sql,
                                        type=json_type,
                                        column_alias=column_alias,
                                        nested_path=[nested_path]
                                        # fake the real nested path, we only look at [0] anyway
                                    )
                    finally:
                        si += 1
            elif startswith_field(nested_path, primary_nested_path):
                # ADD REQUIRED COLUMNS, FOR DEEP STUFF
                for ci, c in enumerate(active_columns[nested_path]):
                    if c.type in STRUCT:
                        continue

                    column_number = len(sql_selects)
                    nested_path = c.nested_path
                    unsorted_sql = nest_to_alias[nested_path[0]] + "." + quote_table(c.es_column)
                    column_alias = _make_column_name(column_number)
                    if concat_field(alias, unsorted_sql) in selects and len(unsorted_sql.split())==1:
                        continue
                    selects.append(concat_field(alias, unsorted_sql))
                    sql_selects.append(alias + "." + unsorted_sql + " AS " + column_alias)
                    index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                        push_name=s.name,
                        push_column_name=s.name,
                        push_column=si,
                        push_child=relative_field(c.names["."], s.name),
                        pull=get_column(column_number),
                        sql=unsorted_sql,
                        type=c.type,
                        column_alias=column_alias,
                        nested_path=nested_path
                    )

        where_clause = query.where.to_sql(schema, boolean=True)[0].sql.b
        unsorted_sql = self._make_sql_for_one_nest_in_set_op(
            ".",
            sql_selects,
            where_clause,
            active_columns,
            index_to_column
        )

        for n, _ in self.sf.tables.items():
            sorts.append(COLUMN + text_type(index_to_uid[n]))

        ordered_sql = (
            "SELECT * FROM (\n" +
            unsorted_sql +
            "\n)" +
            "\nORDER BY\n" + ",\n".join(sorts) +
            "\nLIMIT " + quote_value(query.limit)
        )
        self.db.create_new_functions()  #creating new functions: regexp
        result = self.db.query(ordered_sql)

        def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id, parent_id_coord):
            """
            :param rows: REVERSED STACK OF ROWS (WITH push() AND pop())
            :param row: CURRENT ROW BEING EXTRACTED
            :param nested_doc_details: {
                    "nested_path": wrap_nested_path(nested_path),
                    "index_to_column": map from column number to column details
                    "children": all possible direct decedents' nested_doc_details
                 }
            :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop)
            :param parent_id_coord: the column number for the parent id (so we ca extract from each row)
            :return: the nested property (usually an array)
            """
            previous_doc_id = None
            doc = Null
            output = []
            id_coord = nested_doc_details['id_coord']

            while True:
                doc_id = row[id_coord]

                if doc_id == None or (parent_id_coord is not None and row[parent_id_coord] != parent_doc_id):
                    rows.append(row)  # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc)
                    return output

                if doc_id != previous_doc_id:
                    previous_doc_id = doc_id
                    doc = Null
                    curr_nested_path = nested_doc_details['nested_path'][0]
                    index_to_column = nested_doc_details['index_to_column'].items()
                    if index_to_column:
                        for i, c in index_to_column:
                            value = row[i]
                            if value == None:
                                continue
                            if value == '':
                                continue

                            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_path=join_field([c.push_name]+split_field(c.push_child))
                            else:           # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_path=c.push_child

                            if relative_path == ".":
                                doc = value
                            elif doc is Null:
                                doc = Data()
                                doc[relative_path] = value
                            else:
                                doc[relative_path] = value

                for child_details in nested_doc_details['children']:
                    # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS
                    child_id = row[child_details['id_coord']]
                    if child_id is not None:
                        nested_value = _accumulate_nested(rows, row, child_details, doc_id, id_coord)
                        if nested_value:
                            push_name = child_details['nested_path'][0]
                            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_path=relative_field(push_name, curr_nested_path)
                            else:           # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_path="."

                            if relative_path == "." and doc is Null:
                                doc = nested_value
                            elif relative_path == ".":
                                doc[push_name] = unwraplist([v[push_name] for v in nested_value])
                            elif doc is Null:
                                doc = Data()
                                doc[relative_path] = unwraplist(nested_value)
                            else:
                                doc[relative_path] = unwraplist(nested_value)

                output.append(doc)

                try:
                    row = rows.pop()
                except IndexError:
                    return output

        cols = tuple([i for i in index_to_column.values() if i.push_name != None])
        rows = list(reversed(unwrap(result.data)))
        if rows:
            row = rows.pop()
            data = _accumulate_nested(rows, row, primary_doc_details, None, None)
        else:
            data = result.data

        if query.format == "cube":
            for f, _ in self.sf.tables.items():
                if frum.endswith(f) or (test_dots(cols) and isinstance(query.select, list)):
                    num_rows = len(result.data)
                    num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0
                    map_index_to_name = {c.push_column: c.push_column_name for c in cols}
                    temp_data = [[None]*num_rows for _ in range(num_cols)]
                    for rownum, d in enumerate(result.data):
                        for c in cols:
                            if c.push_child == ".":
                                temp_data[c.push_column][rownum] = c.pull(d)
                            else:
                                column = temp_data[c.push_column][rownum]
                                if column is None:
                                    column = temp_data[c.push_column][rownum] = Data()
                                column[c.push_child] = c.pull(d)
                    output = Data(
                        meta={"format": "cube"},
                        data={n: temp_data[c] for c, n in map_index_to_name.items()},
                        edges=[{
                            "name": "rownum",
                            "domain": {
                                "type": "rownum",
                                "min": 0,
                                "max": num_rows,
                                "interval": 1
                            }
                        }]
                    )
                    return output

            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                num_rows = len(data)
                map_index_to_name = {c.push_column: c.push_column_name for c in cols}
                temp_data = Data()
                for rownum, d in enumerate(data):
                    for k, v in d.items():
                        if temp_data[k] == None:
                            temp_data[k] = [None] * num_rows
                        temp_data[k][rownum] = v
                return Data(
                    meta={"format": "cube"},
                    data={n: temp_data[literal_field(n)] for c, n in map_index_to_name.items()},
                    edges=[{
                        "name": "rownum",
                        "domain": {
                            "type": "rownum",
                            "min": 0,
                            "max": num_rows,
                            "interval": 1
                        }
                    }]
                )
            else:
                num_rows = len(data)
                map_index_to_name = {c.push_column: c.push_column_name for c in cols}
                temp_data = [data]

                return Data(
                    meta={"format": "cube"},
                    data={n: temp_data[c] for c, n in map_index_to_name.items()},
                    edges=[{
                        "name": "rownum",
                        "domain": {
                            "type": "rownum",
                            "min": 0,
                            "max": num_rows,
                            "interval": 1
                        }
                    }]
                )

        elif query.format == "table":
            for f, _ in self.sf.tables.items():
                if  frum.endswith(f):
                    num_column = MAX([c.push_column for c in cols])+1
                    header = [None]*num_column
                    for c in cols:
                        header[c.push_column] = c.push_column_name

                    output_data = []
                    for d in result.data:
                        row = [None] * num_column
                        for c in cols:
                            set_column(row, c.push_column, c.push_child, c.pull(d))
                        output_data.append(row)

                    return Data(
                        meta={"format": "table"},
                        header=header,
                        data=output_data
                    )
            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                num_rows = len(data)
                column_names= [None]*(max(c.push_column for c in cols) + 1)
                for c in cols:
                    column_names[c.push_column] = c.push_column_name

                temp_data = []
                for rownum, d in enumerate(data):
                    row =[None] * len(column_names)
                    for i, (k, v) in enumerate(sorted(d.items())):
                        for c in cols:
                            if k==c.push_name:
                                row[c.push_column] = v
                    temp_data.append(row)

                return Data(
                    meta={"format": "table"},
                    header=column_names,
                    data=temp_data
                )
            else:
                column_names = listwrap(query.select).name
                return Data(
                    meta={"format": "table"},
                    header=column_names,
                    data=[[d] for d in data]
                )

        else:
            for f, _ in self.sf.tables.items():
                if frum.endswith(f) or (test_dots(cols) and isinstance(query.select, list)):
                    data = []
                    for d in result.data:
                        row = Data()
                        for c in cols:
                            if c.push_child == ".":
                                row[c.push_name] = c.pull(d)
                            elif c.num_push_columns:
                                tuple_value = row[c.push_name]
                                if not tuple_value:
                                    tuple_value = row[c.push_name] = [None] * c.num_push_columns
                                tuple_value[c.push_child] = c.pull(d)
                            elif not isinstance(query.select, list):   # select is value type
                                row[c.push_child]=c.pull(d)
                            else:
                                row[c.push_name][c.push_child] = c.pull(d)

                        data.append(row)

                    return Data(
                        meta={"format": "list"},
                        data=data
                    )

            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                temp_data=[]
                for rownum, d in enumerate(data):
                    row = {}
                    for k, v in d.items():
                        for c in cols:
                            if c.push_name==c.push_column_name==k:
                                    row[c.push_column_name] = v
                            elif c.push_name==k and c.push_column_name!=k:
                                    row[c.push_column_name] = v
                    temp_data.append(row)
                return Data(
                    meta={"format": "list"},
                    data=temp_data
                )
            else:
                return Data(
                    meta={"format": "list"},
                    data=data
                )
Esempio n. 16
0
    def _set_op(self, query, frum):
        # GET LIST OF COLUMNS
        primary_nested_path = join_field(split_field(frum)[1:])
        vars_ = UNION([s.value.vars() for s in listwrap(query.select)])

        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.nested_tables.items())
        }

        active_columns = {".": []}
        for cname, cols in self.columns.items():
            if any(startswith_field(cname, v) for v in vars_):
                for c in cols:
                    if c.type in STRUCT:
                        continue
                    nest = c.nested_path[0]
                    active = active_columns.get(nest)
                    if not active:
                        active = active_columns[nest] = []
                    active.append(c)
        # ANY VARS MENTIONED WITH NO COLUMNS?
        for v in vars_:
            if not any(
                    startswith_field(cname, v)
                    for cname in self.columns.keys()):
                active_columns["."].append(
                    Column(names={self.name: v},
                           type="null",
                           es_column=".",
                           es_index=".",
                           nested_path=["."]))

        # EVERY COLUMN, AND THE INDEX IT TAKES UP
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        index_to_uid = {}  # FROM NESTED PATH TO THE INDEX OF UID
        sql_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.nested_tables.items())
        }

        sorts = []
        if query.sort:
            for s in query.sort:
                col = s.value.to_sql(self)[0]
                for t, sql in col.sql.items():
                    json_type = sql_type_to_json_type[t]
                    if json_type in STRUCT:
                        continue
                    column_number = len(sql_selects)
                    # SQL HAS ABS TABLE REFERENCE
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(sql + " AS " + column_alias)
                    if s.sort == -1:
                        sorts.append(column_alias + " IS NOT NULL")
                        sorts.append(column_alias + " DESC")
                    else:
                        sorts.append(column_alias + " IS NULL")
                        sorts.append(column_alias)

        primary_doc_details = Data()
        # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH
        # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED
        for nested_path, sub_table in self.nested_tables.items():
            nested_doc_details = {
                "sub_table": sub_table,
                "children": [],
                "index_to_column": {},
                "nested_path":
                [nested_path
                 ]  # fake the real nested path, we only look at [0] anyway
            }

            # INSERT INTO TREE
            if not primary_doc_details:
                primary_doc_details = nested_doc_details
            else:

                def place(parent_doc_details):
                    if startswith_field(nested_path,
                                        parent_doc_details['nested_path'][0]):
                        for c in parent_doc_details['children']:
                            if place(c):
                                return True
                        parent_doc_details['children'].append(
                            nested_doc_details)

                place(primary_doc_details)

            alias = nested_doc_details['alias'] = nest_to_alias[nested_path]

            # WE ALWAYS ADD THE UID AND ORDER
            column_number = index_to_uid[nested_path] = nested_doc_details[
                'id_coord'] = len(sql_selects)
            sql_select = alias + "." + quoted_UID
            sql_selects.append(sql_select + " AS " +
                               _make_column_name(column_number))
            if nested_path != ".":
                sql_select = alias + "." + quote_table(ORDER)
                sql_selects.append(sql_select + " AS " +
                                   _make_column_name(column_number))

            # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM
            if nested_path not in active_columns:
                continue

            if primary_nested_path == nested_path:
                # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE
                si = 0
                for s in listwrap(query.select):
                    try:
                        column_number = len(sql_selects)
                        s.pull = get_column(column_number)
                        db_columns = s.value.to_sql(self)

                        if isinstance(s.value, LeavesOp):
                            for column in db_columns:
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(
                                        column_number)
                                    sql_selects.append(unsorted_sql + " AS " +
                                                       column_alias)
                                    index_to_column[
                                        column_number] = nested_doc_details[
                                            'index_to_column'][
                                                column_number] = Data(
                                                    push_name=concat_field(
                                                        s.name, column.name),
                                                    push_column=si,
                                                    push_child=".",
                                                    pull=get_column(
                                                        column_number),
                                                    sql=unsorted_sql,
                                                    type=json_type,
                                                    nested_path=[nested_path]
                                                    # fake the real nested path, we only look at [0] anyway
                                                )
                                    si += 1
                        else:
                            for column in db_columns:
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(
                                        column_number)
                                    sql_selects.append(unsorted_sql + " AS " +
                                                       column_alias)
                                    index_to_column[
                                        column_number] = nested_doc_details[
                                            'index_to_column'][
                                                column_number] = Data(
                                                    push_name=s.name,
                                                    push_column=si,
                                                    push_child=column.name,
                                                    pull=get_column(
                                                        column_number),
                                                    sql=unsorted_sql,
                                                    type=json_type,
                                                    nested_path=[nested_path]
                                                    # fake the real nested path, we only look at [0] anyway
                                                )
                    finally:
                        si += 1
            elif startswith_field(nested_path, primary_nested_path):
                # ADD REQUIRED COLUMNS, FOR DEEP STUFF
                for ci, c in enumerate(active_columns[nested_path]):
                    if c.type in STRUCT:
                        continue

                    column_number = len(sql_selects)
                    nested_path = c.nested_path
                    unsorted_sql = nest_to_alias[
                        nested_path[0]] + "." + quote_table(c.es_column)
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(unsorted_sql + " AS " + column_alias)
                    index_to_column[column_number] = nested_doc_details[
                        'index_to_column'][column_number] = Data(
                            push_name=s.name,
                            push_column=si,
                            push_child=relative_field(c.name, s.name),
                            pull=get_column(column_number),
                            sql=unsorted_sql,
                            type=c.type,
                            nested_path=nested_path)

        where_clause = query.where.to_sql(self, boolean=True)[0].sql.b

        unsorted_sql = self._make_sql_for_one_nest_in_set_op(
            ".", sql_selects, where_clause, active_columns, index_to_column)

        for n, _ in self.nested_tables.items():
            sorts.append(COLUMN + unicode(index_to_uid[n]))

        ordered_sql = ("SELECT * FROM (\n" + unsorted_sql + "\n)" +
                       "\nORDER BY\n" + ",\n".join(sorts) + "\nLIMIT " +
                       quote_value(query.limit))
        result = self.db.query(ordered_sql)

        def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id,
                               parent_id_coord):
            """
            :param rows: REVERSED STACK OF ROWS (WITH push() AND pop())
            :param row: CURRENT ROW BEING EXTRACTED
            :param nested_doc_details: {
                    "nested_path": wrap_nested_path(nested_path),
                    "index_to_column": map from column number to column details
                    "children": all possible direct decedents' nested_doc_details
                 }
            :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop)
            :param parent_id_coord: the column number for the parent id (so we ca extract from each row)
            :return: the nested property (usually an array)
            """
            previous_doc_id = None
            doc = Data()
            output = []
            id_coord = nested_doc_details['id_coord']

            while True:
                doc_id = row[id_coord]

                if doc_id == None or (parent_id_coord is not None and
                                      row[parent_id_coord] != parent_doc_id):
                    rows.append(row)  # UNDO
                    output = unwraplist(output)
                    return output if output else None

                if doc_id != previous_doc_id:
                    previous_doc_id = doc_id
                    doc = Data()
                    curr_nested_path = nested_doc_details['nested_path'][0]
                    if isinstance(query.select, list) or isinstance(
                            query.select.value, LeavesOp):
                        # ASSIGN INNER PROPERTIES
                        for i, c in nested_doc_details[
                                'index_to_column'].items():
                            value = row[i]
                            if value == None:
                                continue
                            if value == '':
                                continue

                            relative_path = relative_field(
                                concat_field(c.push_name, c.push_child),
                                curr_nested_path)
                            if relative_path == ".":
                                doc = value
                            else:
                                doc[relative_path] = value
                    else:
                        # ASSIGN INNER PROPERTIES
                        for i, c in nested_doc_details[
                                'index_to_column'].items():
                            value = row[i]
                            if value is not None:
                                relative_path = relative_field(
                                    c.push_child, curr_nested_path)
                                if relative_path == ".":
                                    doc = value
                                else:
                                    doc[relative_path] = value
                    output.append(doc)

                # ASSIGN NESTED ARRAYS
                for child_details in nested_doc_details['children']:
                    child_id = row[child_details['id_coord']]
                    if child_id is not None:
                        nested_value = _accumulate_nested(
                            rows, row, child_details, doc_id, id_coord)
                        if nested_value is not None:
                            path = child_details['nested_path'][0]
                            doc[path] = nested_value

                try:
                    row = rows.pop()
                except IndexError:
                    output = unwraplist(output)
                    return output if output else None

        cols = tuple(index_to_column.values())

        if query.format == "cube":
            num_rows = len(result.data)
            num_cols = MAX([c.push_column
                            for c in cols]) + 1 if len(cols) else 0
            map_index_to_name = {c.push_column: c.push_name for c in cols}
            temp_data = [[None] * num_rows for _ in range(num_cols)]
            for rownum, d in enumerate(result.data):
                for c in cols:
                    if c.push_child == ".":
                        temp_data[c.push_column][rownum] = c.pull(d)
                    else:
                        column = temp_data[c.push_column][rownum]
                        if column is None:
                            column = temp_data[c.push_column][rownum] = {}
                        column[c.push_child] = c.pull(d)

            output = Data(
                meta={"format": "cube"},
                data={n: temp_data[c]
                      for c, n in map_index_to_name.items()},
                edges=[{
                    "name": "rownum",
                    "domain": {
                        "type": "rownum",
                        "min": 0,
                        "max": num_rows,
                        "interval": 1
                    }
                }])
            return output
        elif query.format == "table":
            num_column = MAX([c.push_column for c in cols]) + 1
            header = [None] * num_column
            for c in cols:
                # header[c.push_column] = c.push_name
                sf = split_field(c.push_name)
                if len(sf) == 0:
                    header[c.push_column] = "."
                elif len(sf) == 1:
                    header[c.push_column] = sf[0]
                else:
                    # TABLES ONLY USE THE FIRST-LEVEL PROPERTY NAMES
                    # PUSH ALL DEEPER NAMES TO CHILD
                    header[c.push_column] = sf[0]
                    c.push_child = join_field(sf[1:] +
                                              split_field(c.push_child))

            output_data = []
            for d in result.data:
                row = [None] * num_column
                for c in cols:
                    set_column(row, c.push_column, c.push_child, c.pull(d))
                output_data.append(row)
            return Data(meta={"format": "table"},
                        header=header,
                        data=output_data)
        else:
            rows = list(reversed(unwrap(result.data)))
            row = rows.pop()
            output = Data(meta={"format": "list"},
                          data=listwrap(
                              _accumulate_nested(rows, row,
                                                 primary_doc_details, None,
                                                 None)))
            return output
Esempio n. 17
0
    def _groupby_op(self, query, frum):
        schema = self.sf.tables[join_field(split_field(frum)[1:])].schema
        index_to_column = {}
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.sf.tables.items())
        }
        frum_path = split_field(frum)
        base_table = join_field(frum_path[0:1])
        path = join_field(frum_path[1:])
        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = join_field(
            [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias
        previous = tables[0]
        for t in tables[1::]:
            from_sql += ("\nLEFT JOIN\n" +
                         quote_table(concat_field(base_table, t.nest)) + " " +
                         t.alias + " ON " + t.alias + "." + PARENT + " = " +
                         previous.alias + "." + UID)

        selects = []
        groupby = []
        for i, e in enumerate(query.groupby):
            for s in e.value.to_sql(schema):
                column_number = len(selects)
                sql_type, sql = s.sql.items()[0]
                if sql == 'NULL' and not e.value.var in schema.keys():
                    Log.error("No such column {{var}}", var=e.value.var)

                column_alias = _make_column_name(column_number)
                groupby.append(sql)
                selects.append(sql + " AS " + column_alias)
                if s.nested_path == ".":
                    select_name = s.name
                else:
                    select_name = "."
                index_to_column[column_number] = ColumnMapping(
                    is_edge=True,
                    push_name=e.name,
                    push_column_name=e.name.replace("\\.", "."),
                    push_column=i,
                    push_child=select_name,
                    pull=get_column(column_number),
                    sql=sql,
                    column_alias=column_alias,
                    type=sql_type_to_json_type[sql_type])

        for i, s in enumerate(listwrap(query.select)):
            column_number = len(selects)
            sql_type, sql = s.value.to_sql(schema)[0].sql.items()[0]
            if sql == 'NULL' and not s.value.var in schema.keys():
                Log.error("No such column {{var}}", var=s.value.var)

            if s.value == "." and s.aggregate == "count":
                selects.append("COUNT(1) AS " + quote_table(s.name))
            else:
                selects.append(sql_aggs[s.aggregate] + "(" + sql + ") AS " +
                               quote_table(s.name))

            index_to_column[column_number] = ColumnMapping(
                push_name=s.name,
                push_column_name=s.name,
                push_column=i + len(query.groupby),
                push_child=".",
                pull=get_column(column_number),
                sql=sql,
                column_alias=quote_table(s.name),
                type=sql_type_to_json_type[sql_type])

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        where = query.where.to_sql(schema)[0].sql.b

        command = "SELECT\n" + (",\n".join(selects)) + \
                  "\nFROM\n" + from_sql + \
                  "\nWHERE\n" + where + \
                  "\nGROUP BY\n" + ",\n".join(groupby)

        if query.sort:
            command += "\nORDER BY " + ",\n".join(
                "(" + sql[t] + ") IS NULL" + ",\n" + sql[t] +
                (" DESC" if s.sort == -1 else "")
                for s, sql in [(s, s.value.to_sql(schema)[0].sql)
                               for s in query.sort] for t in "bns" if sql[t])

        return command, index_to_column
Esempio n. 18
0
    def query(self, query):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        if not startswith_field(query['from'], self.sf.fact):
            Log.error("Expecting table, or some nested table")
        frum, query['from'] = query['from'], self
        schema = self.sf.tables["."].schema
        if not query.groupby:
            query = QueryOp.wrap(query, schema)
        # TYPE CONFLICTS MUST NOW BE RESOLVED DURING
        # TYPE-SPECIFIC QUERY NORMALIZATION
        # vars_ = query.vars(exclude_select=True)
        # type_map = {
        #     v: c.es_column
        #     for v in vars_
        #     if v in self.columns and len([c for c in self.columns[v] if c.type != "nested"]) == 1
        #     for c in self.columns[v]
        #     if c.type != "nested"
        # }
        #
        # sql_query = query.map(type_map)
        query = query

        new_table = "temp_" + unique_name()

        if query.format == "container":
            create_table = "CREATE TABLE " + quote_table(new_table) + " AS "
        else:
            create_table = ""

        if query.groupby and query.format != "cube":
            query = QueryOp.wrap(query, schema)
            op, index_to_columns = self._groupby_op(query, frum)
            command = create_table + op
        elif query.groupby:
            query.edges, query.groupby = query.groupby, query.edges
            query = QueryOp.wrap(query, schema)
            op, index_to_columns = self._edges_op(query, frum)
            command = create_table + op
            query.edges, query.groupby = query.groupby, query.edges
        elif query.edges or any(a != "none"
                                for a in listwrap(query.select).aggregate):
            op, index_to_columns = self._edges_op(query, frum)
            command = create_table + op
        else:
            op = self._set_op(query, frum)
            return op

        result = self.db.query(command)

        if query.format == "container":
            output = QueryTable(new_table,
                                db=self.db,
                                uid=self.uid,
                                exists=True)
        elif query.format == "cube" or (not query.format and query.edges):
            column_names = [None
                            ] * (max(c.push_column
                                     for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name

            if len(query.edges) == 0 and len(query.groupby) == 0:
                data = {n: Data() for n in column_names}
                for s in index_to_columns.values():
                    data[s.push_name][s.push_child] = unwrap(
                        s.pull(result.data[0]))
                if isinstance(query.select, list):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(data=unwrap(data),
                            select=select,
                            meta={"format": "cube"})

            if not result.data:
                edges = []
                dims = []
                for i, e in enumerate(query.edges + query.groupby):
                    allowNulls = coalesce(e.allowNulls, True)

                    if e.domain.type == "set" and e.domain.partitions:
                        domain = SimpleSetDomain(
                            partitions=e.domain.partitions.name)
                    elif e.domain.type == "range":
                        domain = e.domain
                    elif isinstance(e.value, TupleOp):
                        pulls = jx.sort([
                            c for c in index_to_columns.values()
                            if c.push_name == e.name
                        ], "push_child").pull
                        parts = [
                            tuple(p(d) for p in pulls) for d in result.data
                        ]
                        domain = SimpleSetDomain(
                            partitions=jx.sort(set(parts)))
                    else:
                        domain = SimpleSetDomain(partitions=[])

                    dims.append(1 if allowNulls else 0)
                    edges.append(
                        Data(name=e.name, allowNulls=allowNulls,
                             domain=domain))

                data = {}
                for si, s in enumerate(listwrap(query.select)):
                    if s.aggregate == "count":
                        data[s.name] = Matrix(dims=dims, zeros=0)
                    else:
                        data[s.name] = Matrix(dims=dims)

                if isinstance(query.select, list):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(meta={"format": "cube"},
                            edges=edges,
                            select=select,
                            data={k: v.cube
                                  for k, v in data.items()})

            columns = None

            edges = []
            dims = []
            for g in query.groupby:
                g.is_groupby = True

            for i, e in enumerate(query.edges + query.groupby):
                allowNulls = coalesce(e.allowNulls, True)

                if e.domain.type == "set" and e.domain.partitions:
                    domain = SimpleSetDomain(
                        partitions=e.domain.partitions.name)
                elif e.domain.type == "range":
                    domain = e.domain
                elif e.domain.type == "time":
                    domain = wrap(mo_json.scrub(e.domain))
                elif e.domain.type == "duration":
                    domain = wrap(mo_json.scrub(e.domain))
                elif isinstance(e.value, TupleOp):
                    pulls = jx.sort([
                        c for c in index_to_columns.values()
                        if c.push_name == e.name
                    ], "push_child").pull
                    parts = [tuple(p(d) for p in pulls) for d in result.data]
                    domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                else:
                    if not columns:
                        columns = zip(*result.data)
                    parts = set(columns[i])
                    if e.is_groupby and None in parts:
                        allowNulls = True
                    parts -= {None}

                    if query.sort[i].sort == -1:
                        domain = SimpleSetDomain(
                            partitions=wrap(sorted(parts, reverse=True)))
                    else:
                        domain = SimpleSetDomain(partitions=jx.sort(parts))

                dims.append(len(domain.partitions) + (1 if allowNulls else 0))
                edges.append(
                    Data(name=e.name, allowNulls=allowNulls, domain=domain))

            data_cubes = {}
            for si, s in enumerate(listwrap(query.select)):
                if s.aggregate == "count":
                    data_cubes[s.name] = Matrix(dims=dims, zeros=0)
                else:
                    data_cubes[s.name] = Matrix(dims=dims)

            r2c = index_to_coordinate(
                dims)  # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM
            for rownum, row in enumerate(result.data):
                coord = r2c(rownum)

                for i, s in enumerate(index_to_columns.values()):
                    if s.is_edge:
                        continue
                    if s.push_child == ".":
                        data_cubes[s.push_name][coord] = s.pull(row)
                    else:
                        data_cubes[s.push_name][coord][s.push_child] = s.pull(
                            row)

            if isinstance(query.select, list):
                select = [{"name": s.name} for s in query.select]
            else:
                select = {"name": query.select.name}

            return Data(meta={"format": "cube"},
                        edges=edges,
                        select=select,
                        data={k: v.cube
                              for k, v in data_cubes.items()})
        elif query.format == "table" or (not query.format and query.groupby):
            column_names = [None
                            ] * (max(c.push_column
                                     for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name
            data = []
            for d in result.data:
                row = [None for _ in column_names]
                for s in index_to_columns.values():
                    if s.push_child == ".":
                        row[s.push_column] = s.pull(d)
                    elif s.num_push_columns:
                        tuple_value = row[s.push_column]
                        if tuple_value == None:
                            tuple_value = row[
                                s.push_column] = [None] * s.num_push_columns
                        tuple_value[s.push_child] = s.pull(d)
                    elif row[s.push_column] == None:
                        row[s.push_column] = Data()
                        row[s.push_column][s.push_child] = s.pull(d)
                    else:
                        row[s.push_column][s.push_child] = s.pull(d)
                data.append(tuple(unwrap(r) for r in row))

            output = Data(meta={"format": "table"},
                          header=column_names,
                          data=data)
        elif query.format == "list" or (not query.edges and not query.groupby):
            if not query.edges and not query.groupby and any(
                    listwrap(query.select).aggregate):
                if isinstance(query.select, list):
                    data = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            if data[c.push_name] == None:
                                data[c.push_name] = c.pull(result.data[0])
                            elif isinstance(data[c.push_name], list):
                                data[c.push_name].append(c.pull(
                                    result.data[0]))
                            else:
                                data[c.push_name] = [
                                    data[c.push_name],
                                    c.pull(result.data[0])
                                ]
                        else:
                            data[c.push_name][c.push_child] = c.pull(
                                result.data[0])

                    output = Data(meta={"format": "value"}, data=data)
                else:
                    data = Data()
                    for s in index_to_columns.values():
                        if data[s.push_child] == None:
                            data[s.push_child] = s.pull(result.data[0])
                        else:
                            data[s.push_child] += [s.pull(result.data[0])]
                    output = Data(meta={"format": "value"}, data=unwrap(data))
            else:
                data = []
                for rownum in result.data:
                    row = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            row[c.push_name] = c.pull(rownum)
                        elif c.num_push_columns:
                            tuple_value = row[c.push_name]
                            if not tuple_value:
                                tuple_value = row[
                                    c.push_name] = [None] * c.num_push_columns
                            tuple_value[c.push_child] = c.pull(rownum)
                        else:
                            row[c.push_name][c.push_child] = c.pull(rownum)

                    data.append(row)

                output = Data(meta={"format": "list"}, data=data)
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output
Esempio n. 19
0
 def delete(self, where):
     filter = where.to_sql()
     self.db.execute("DELETE FROM " + quote_table(self.sf.fact) +
                     " WHERE " + filter)
Esempio n. 20
0
         on_clause = edge_alias + "." + domain_name + " < " + edge_values[1][1] + " AND " + \
                     edge_values[0][1] + " < (" + edge_alias + "." + domain_name + " + " + text_type(
             d.interval) + ")"
         not_on_clause = None
     else:
         Log.error("do not know how to handle")
         # select_clause.extend(v[0] + " " + k for k, v in zip(domain_names, edge_values))
 elif len(edge_names) > 1:
     domain_names = [
         "d" + text_type(edge_index) + "c" + text_type(i)
         for i, _ in enumerate(edge_names)
     ]
     query_edge.allowNulls = False
     domain_columns = [
         c for c in self.sf.columns
         if quote_table(c.es_column) in vals
     ]
     if not domain_columns:
         domain_nested_path = "."
         Log.note("expecting a known column")
     else:
         domain_nested_path = domain_columns[0].nested_path
     domain_table = quote_table(
         concat_field(self.sf.fact, domain_nested_path[0]))
     domain = ("\nSELECT " +
               ",\n".join(g + " AS " + n
                          for n, g in zip(domain_names, vals)) +
               "\nFROM\n" + domain_table + " " +
               nest_to_alias["."] + "\nGROUP BY\n" +
               ",\n".join(vals))
     limit = Math.min(query.limit, query_edge.domain.limit)
Esempio n. 21
0
    def _edges_op(self, query, frum):
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        frum_path = split_field(frum)
        base_table = join_field(frum_path[0:1])
        path = join_field(frum_path[1:])
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.sf.tables.items())
        }

        schema = self.sf.tables[relative_field(frum, self.sf.fact)].schema

        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = join_field(
            [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias
        previous = tables[0]
        for t in tables[1::]:
            from_sql += ("\nLEFT JOIN\n" +
                         quote_table(concat_field(base_table, t.nest)) + " " +
                         t.alias + " ON " + t.alias + "." + PARENT + " = " +
                         previous.alias + "." + UID)

        # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH
        ons = []
        join_types = []
        wheres = []
        not_ons = ["__exists__ IS NULL"]
        groupby = []
        not_groupby = []
        orderby = []
        domains = []
        select_clause = [
            "1 __exists__"  # USED TO DISTINGUISH BETWEEN NULL-BECAUSE-LEFT-JOIN OR NULL-BECAUSE-NULL-VALUE
        ]

        for edge_index, query_edge in enumerate(query.edges):
            edge_alias = "e" + text_type(edge_index)

            if query_edge.value:
                edge_values = [
                    p for c in query_edge.value.to_sql(schema).sql
                    for p in c.items()
                ]

            elif not query_edge.value and any(
                    query_edge.domain.partitions.where):
                case = "CASE "
                for pp, p in enumerate(query_edge.domain.partitions):
                    w = p.where.to_sql(schema)[0].sql.b
                    t = quote_value(pp)
                    case += " WHEN " + w + " THEN " + t
                case += " ELSE NULL END "  # quote value with length of partitions
                edge_values = [("n", case)]

            elif query_edge.range:
                edge_values = query_edge.range.min.to_sql(schema)[0].sql.items(
                ) + query_edge.range.max.to_sql(schema)[0].sql.items()
Esempio n. 22
0
    def __init__(self, name, db=None, uid=GUID, exists=False, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        Container.__init__(self, frum=None)
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()

        if not _config:
            from pyLibrary.queries.containers import config as _config
            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        self.name = name
        self.uid = listwrap(uid)
        self._next_uid = 1
        self._make_digits_table()

        self.uid_accessor = jx.get(self.uid)
        self.nested_tables = OrderedDict(
        )  # MAP FROM NESTED PATH TO Table OBJECT, PARENTS PROCEED CHILDREN
        self.nested_tables["."] = self
        self.columns = Index(
            keys=[join_field(["names", self.name])]
        )  # MAP FROM DOCUMENT ABS PROPERTY NAME TO THE SET OF SQL COLUMNS IT REPRESENTS (ONE FOR EACH REALIZED DATATYPE)

        if not exists:
            for u in self.uid:
                if u == GUID:
                    pass
                else:
                    c = Column(names={name: u},
                               type="string",
                               es_column=typed_column(u, "string"),
                               es_index=name)
                    self.add_column_to_schema(self.nested_tables, c)

            command = ("CREATE TABLE " + quote_table(name) + "(" +
                       (",".join([quoted_UID + " INTEGER"] + [
                           _quote_column(c) + " " + sql_types[c.type]
                           for u, cs in self.columns.items() for c in cs
                       ])) + ", PRIMARY KEY (" + (", ".join([quoted_UID] + [
                           _quote_column(c) for u in self.uid
                           for c in self.columns[u]
                       ])) + "))")

            self.db.execute(command)
        else:
            # LOAD THE COLUMNS
            command = "PRAGMA table_info(" + quote_table(name) + ")"
            details = self.db.query(command)

            for r in details:
                cname = untyped_column(r[1])
                ctype = r[2].lower()
                column = Column(names={name: cname},
                                type=ctype,
                                nested_path=['.'],
                                es_column=typed_column(cname, ctype),
                                es_index=name)

                self.add_column_to_schema(self.columns, column)
Esempio n. 23
0
    def _nest_column(self, column, new_path):
        destination_table = concat_field(self.fact, new_path)
        existing_table = concat_field(self.fact, column.nested_path[0])

        # FIND THE INNER COLUMNS WE WILL BE MOVING
        moving_columns = []
        for c in self.columns:
            if destination_table != column.es_index and column.es_column == c.es_column:
                moving_columns.append(c)
                c.nested_path = [new_path] + c.nested_path

        # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO?

        # DEFINE A NEW TABLE?
        # LOAD THE COLUMNS
        command = "PRAGMA table_info(" + quote_table(destination_table) + ")"
        details = self.db.query(command)
        if not details.data:
            command = ("CREATE TABLE " + quote_table(destination_table) + "(" +
                       (",".join([
                           quoted_UID + " INTEGER", quoted_PARENT + " INTEGER",
                           quoted_ORDER + " INTEGER"
                       ])) + ", PRIMARY KEY (" + quoted_UID + ")" +
                       ", FOREIGN KEY (" + quoted_PARENT + ") REFERENCES " +
                       quote_table(existing_table) + "(" + quoted_UID + ")"
                       ")")
            self.db.execute(command)
            self.add_table_to_schema([new_path])

        # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY
        if not moving_columns:
            return

        column.es_index = destination_table
        self.db.execute("ALTER TABLE " + quote_table(destination_table) +
                        " ADD COLUMN " + quote_column(column.es_column) + " " +
                        sql_types[column.type])

        # Deleting parent columns
        for col in moving_columns:
            column = col.es_column
            tmp_table = "tmp_" + existing_table
            columns = self.db.query("select * from " +
                                    quote_table(existing_table) +
                                    " LIMIT 0").header
            self.db.execute("ALTER TABLE " + quote_table(existing_table) +
                            " RENAME TO " + quote_table(tmp_table))
            self.db.execute(
                "CREATE TABLE " + quote_table(existing_table) + " AS SELECT " +
                (", ".join([quote_table(c) for c in columns if c != column])) +
                " FROM " + quote_table(tmp_table))
            self.db.execute("DROP TABLE " + quote_table(tmp_table))
Esempio n. 24
0
 def __del__(self):
     self.db.execute("DROP TABLE " + quote_table(self.name))
Esempio n. 25
0
 def __len__(self):
     counter = self.db.query("SELECT COUNT(*) FROM " +
                             quote_table(self.sf.fact))[0][0]
     return counter
Esempio n. 26
0
 def __nonzero__(self):
     counter = self.db.query("SELECT COUNT(*) FROM " +
                             quote_table(self.sf.fact))[0][0]
     return bool(counter)
Esempio n. 27
0
                     edge_values[0][1] + " < (" + edge_alias + "." + domain_name + " + " + unicode(
             d.interval) + ")"
         not_on_clause = None
     else:
         Log.error("do not know how to handle")
         # select_clause.extend(v[0] + " " + k for k, v in zip(domain_names, edge_values))
 elif len(edge_names) > 1:
     domain_names = [
         "d" + unicode(edge_index) + "c" + unicode(i)
         for i, _ in enumerate(edge_names)
     ]
     query_edge.allowNulls = False
     domain = ("\nSELECT " +
               ",\n".join(g + " AS " + n
                          for n, g in zip(domain_names, vals)) +
               "\nFROM\n" + quote_table(self.name) + " " +
               nest_to_alias["."] + "\nGROUP BY\n" +
               ",\n".join(vals))
     limit = Math.min(query.limit, query_edge.domain.limit)
     domain += ("\nORDER BY COUNT(1) DESC" + "\nLIMIT " +
                unicode(limit))
     where = None
     join_type = "LEFT JOIN" if query_edge.allowNulls else "JOIN"
     on_clause = " AND ".join(
         "((" + edge_alias + "." + k + " IS NULL AND " + v +
         " IS NULL) OR " + edge_alias + "." + k + " = " + v + ")"
         for k, v in zip(domain_names, vals))
     not_on_clause = None
 elif isinstance(query_edge.domain, DefaultDomain):
     domain_names = [
         "d" + unicode(edge_index) + "c" + unicode(i)