Beispiel #1
0
    def _aggop(self, query):
        """
        SINGLE ROW RETURNED WITH AGGREGATES
        """
        if isinstance(query.select, list):
            # RETURN SINGLE OBJECT WITH AGGREGATES
            for s in query.select:
                if s.aggregate not in aggregates:
                    Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)

            selects = FlatList()
            for s in query.select:
                selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value),quote_column(s.name)))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(",\n".join(selects)),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.filter)
            })

            return sql, lambda sql: self.db.column(sql)[0]  # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES
        else:
            # RETURN SINGLE VALUE
            s0 = query.select
            if s0.aggregate not in aggregates:
                Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0)

            select = sql_alias(aggregates[s0.aggregate].replace("{{code}}", s0.value) , quote_column(s0.name))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(select),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where)
            })

            def post(sql):
                result = self.db.column_query(sql)
                return result[0][0]

            return sql, post  # RETURN SINGLE VALUE
Beispiel #2
0
    def _aggop(self, query):
        """
        SINGLE ROW RETURNED WITH AGGREGATES
        """
        if isinstance(query.select, list):
            # RETURN SINGLE OBJECT WITH AGGREGATES
            for s in query.select:
                if s.aggregate not in aggregates:
                    Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)

            selects = FlatList()
            for s in query.select:
                selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value),self.db.quote_column(s.name)))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(",\n".join(selects)),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.filter)
            })

            return sql, lambda sql: self.db.column(sql)[0]  # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES
        else:
            # RETURN SINGLE VALUE
            s0 = query.select
            if s0.aggregate not in aggregates:
                Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0)

            select = sql_alias(aggregates[s0.aggregate].replace("{{code}}", s0.value) , self.db.quote_column(s0.name))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(select),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where)
            })

            def post(sql):
                result = self.db.column_query(sql)
                return result[0][0]

            return sql, post  # RETURN SINGLE VALUE
Beispiel #3
0
    def _build_list_sql(self, db, first, batch_size):
        # TODO: ENSURE THE LAST COLUMN IS THE id
        if first:
            dim = len(self._extract.field)
            where = SQL_OR.join(
                sql_iso(
                    sql_and(
                        quote_column(f) + ineq(i, e, dim) +
                        db.quote_value(Date(v) if t == "time" else v)
                        for e, (f, v, t) in enumerate(
                            zip(self._extract.field[0:i + 1:], first,
                                self._extract.type[0:i + 1:]))))
                for i in range(dim))
        else:
            where = SQL_TRUE

        selects = []
        for t, f in zip(self._extract.type, self._extract.field):
            if t == "time":
                selects.append(
                    "CAST" +
                    sql_iso(sql_alias(quote_column(f), SQL("DATETIME(6)"))))
            else:
                selects.append(quote_column(f))
        sql = (SQL_SELECT + sql_list(selects) + SQL_FROM +
               self.settings.snowflake.fact_table + SQL_WHERE + where +
               SQL_ORDERBY +
               sql_list(quote_column(f) for f in self._extract.field) +
               SQL_LIMIT + db.quote_value(batch_size))
        return sql
Beispiel #4
0
 def quote_column(self, column_name, table=None):
     if column_name == None:
         Log.error("missing column_name")
     elif isinstance(column_name, text_type):
         if table:
             column_name = join_column(table, column_name)
         return SQL("`" + column_name.replace(".", "`.`") +
                    "`")  # MY SQL QUOTE OF COLUMN NAMES
     elif isinstance(column_name, list):
         if table:
             return sql_list(join_column(table, c) for c in column_name)
         return sql_list(self.quote_column(c) for c in column_name)
     else:
         # ASSUME {"name":name, "value":value} FORM
         return SQL(
             sql_alias(column_name.value,
                       self.quote_column(column_name.name)))
Beispiel #5
0
def quote_column(column_name, table=None):
    if column_name == None:
        Log.error("missing column_name")
    elif is_text(column_name):
        if table:
            return join_column(table, column_name)
        else:
            return SQL("`" + '`.`'.join(split_field(column_name)) + "`")  # MYSQL QUOTE OF COLUMN NAMES
    elif is_binary(column_name):
        return quote_column(column_name.decode('utf8'), table)
    elif is_list(column_name):
        if table:
            return sql_list(join_column(table, c) for c in column_name)
        return sql_list(quote_column(c) for c in column_name)
    else:
        # ASSUME {"name":name, "value":value} FORM
        return SQL(sql_alias(column_name.value, quote_column(column_name.name)))
    def get_sql(self, get_ids):
        sql = self._compose_sql(get_ids)

        # ORDERING
        sort = []
        ordering = []
        for ci, c in enumerate(self.columns):
            if c.sort:
                sort.append(quote_column(c.column_alias) + SQL_IS_NOT_NULL)
                sort.append(quote_column(c.column_alias))
                ordering.append(ci)

        union_all_sql = SQL_UNION_ALL.join(sql)
        union_all_sql = (SQL_SELECT + SQL_STAR + SQL_FROM +
                         sql_alias(sql_iso(union_all_sql), quote_column('a')) +
                         SQL_ORDERBY + sql_list(sort))
        return union_all_sql
Beispiel #7
0
def quote_column(column_name, table=None):
    if column_name == None:
        Log.error("missing column_name")
    elif is_text(column_name):
        if table:
            return join_column(table, column_name)
        else:
            return SQL("`" + '`.`'.join(split_field(column_name)) +
                       "`")  # MYSQL QUOTE OF COLUMN NAMES
    elif is_binary(column_name):
        return quote_column(column_name.decode('utf8'), table)
    elif is_list(column_name):
        if table:
            return sql_list(join_column(table, c) for c in column_name)
        return sql_list(quote_column(c) for c in column_name)
    else:
        # ASSUME {"name":name, "value":value} FORM
        return SQL(sql_alias(column_name.value,
                             quote_column(column_name.name)))
Beispiel #8
0
                    pull=pull,
                    sql=sql,
                    type=sql_type_to_json_type[json_type],
                    column_alias=sql_name)

            vals = [v for t, v in edge_values]
            if query_edge.domain.type == "set":
                domain_name = quote_column("d" + text_type(edge_index) + "c" +
                                           text_type(column_index))
                domain_names = [domain_name]
                if len(edge_names) > 1:
                    Log.error("Do not know how to handle")
                if query_edge.value:
                    domain = SQL_UNION_ALL.join(
                        SQL_SELECT +
                        sql_alias(quote_value(coalesce(p.dataIndex, i)),
                                  quote_column("rownum")) + SQL_COMMA +
                        sql_alias(quote_value(p.value), domain_name)
                        for i, p in enumerate(query_edge.domain.partitions))
                    if query_edge.allowNulls:
                        domain += (SQL_UNION_ALL + SQL_SELECT + sql_alias(
                            quote_value(len(query_edge.domain.partitions)),
                            quote_column("rownum")) + SQL_COMMA +
                                   sql_alias(SQL_NULL, domain_name))
                    where = None
                    join_type = SQL_LEFT_JOIN if query_edge.allowNulls else SQL_INNER_JOIN
                    on_clause = (SQL_OR.join(
                        join_column(edge_alias, k) + " = " + v
                        for k, v in zip(domain_names, vals)) + SQL_OR +
                                 sql_iso(
                                     join_column(edge_alias, domain_name) +
                                     SQL_IS_NULL + SQL_AND +
    def _make_sql_for_one_nest_in_set_op(
            self,
            primary_nested_path,
            selects,  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE
            where_clause,
            active_columns,
            index_to_sql_select  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
    ):
        """
        FOR EACH NESTED LEVEL, WE MAKE A QUERY THAT PULLS THE VALUES/COLUMNS REQUIRED
        WE `UNION ALL` THEM WHEN DONE
        :param primary_nested_path:
        :param selects:
        :param where_clause:
        :param active_columns:
        :param index_to_sql_select:
        :return: SQL FOR ONE NESTED LEVEL
        """

        parent_alias = "a"
        from_clause = ""
        select_clause = []
        children_sql = []
        done = []
        if not where_clause:
            where_clause = SQL_TRUE
        # STATEMENT FOR EACH NESTED PATH
        for i, (nested_path, sub_table) in enumerate(self.sf.tables.items()):
            if any(startswith_field(nested_path, d) for d in done):
                continue

            alias = quote_column("__" + unichr(ord('a') + i) + "__")

            if primary_nested_path == nested_path:
                select_clause = []
                # ADD SELECT CLAUSE HERE
                for select_index, s in enumerate(selects):
                    sql_select = index_to_sql_select.get(select_index)
                    if not sql_select:
                        select_clause.append(selects[select_index])
                        continue

                    if startswith_field(sql_select.nested_path[0],
                                        nested_path):
                        select_clause.append(
                            sql_alias(sql_select.sql, sql_select.column_alias))
                    else:
                        # DO NOT INCLUDE DEEP STUFF AT THIS LEVEL
                        select_clause.append(
                            sql_alias(SQL_NULL, sql_select.column_alias))

                if nested_path == ".":
                    from_clause += SQL_FROM + sql_alias(
                        quote_column(self.sf.fact), alias)
                else:
                    from_clause += (SQL_LEFT_JOIN + sql_alias(
                        quote_column(concat_field(
                            self.sf.fact, sub_table.name)), alias) + SQL_ON +
                                    join_column(alias, quoted_PARENT) + " = " +
                                    join_column(parent_alias, quoted_UID))
                    where_clause = sql_iso(
                        where_clause) + SQL_AND + join_column(
                            alias, quoted_ORDER) + " > 0"
                parent_alias = alias

            elif startswith_field(primary_nested_path, nested_path):
                # PARENT TABLE
                # NO NEED TO INCLUDE COLUMNS, BUT WILL INCLUDE ID AND ORDER
                if nested_path == ".":
                    from_clause += SQL_FROM + quote_column(
                        self.sf.fact) + " " + alias
                else:
                    parent_alias = alias = unichr(ord('a') + i - 1)
                    from_clause += (SQL_LEFT_JOIN + quote_column(
                        concat_field(self.sf.fact, sub_table.name)) + " " +
                                    alias + SQL_ON +
                                    join_column(alias, quoted_PARENT) + " = " +
                                    join_column(parent_alias, quoted_UID))
                    where_clause = sql_iso(
                        where_clause) + SQL_AND + join_column(
                            parent_alias, quoted_ORDER) + " > 0"
                parent_alias = alias

            elif startswith_field(nested_path, primary_nested_path):
                # CHILD TABLE
                # GET FIRST ROW FOR EACH NESTED TABLE
                from_clause += (SQL_LEFT_JOIN + sql_alias(
                    quote_column(concat_field(self.sf.fact, sub_table.name)),
                    alias) + SQL_ON + join_column(alias, quoted_PARENT) +
                                " = " + join_column(parent_alias, quoted_UID) +
                                SQL_AND + join_column(alias, ORDER) + " = 0")

                # IMMEDIATE CHILDREN ONLY
                done.append(nested_path)
                # NESTED TABLES WILL USE RECURSION
                children_sql.append(
                    self._make_sql_for_one_nest_in_set_op(
                        nested_path,
                        selects,  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE
                        where_clause,
                        active_columns,
                        index_to_sql_select  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
                    ))
            else:
                # SIBLING PATHS ARE IGNORED
                continue

        sql = SQL_UNION_ALL.join([
            SQL_SELECT + sql_list(select_clause) + from_clause + SQL_WHERE +
            where_clause
        ] + children_sql)

        return sql
    def _set_op(self, query, frum):
        # GET LIST OF COLUMNS
        base_name, primary_nested_path = tail_field(frum)
        vars_ = UNION([
            v.var for select in listwrap(query.select)
            for v in select.value.vars()
        ])
        schema = self.sf.tables[primary_nested_path].schema

        active_columns = {".": set()}
        for v in vars_:
            for c in schema.leaves(v):
                nest = c.nested_path[0]
                active_columns.setdefault(nest, set()).add(c)

        # ANY VARS MENTIONED WITH NO COLUMNS?
        for v in vars_:
            if not any(startswith_field(cname, v) for cname in schema.keys()):
                active_columns["."].add(
                    Column(name=v,
                           jx_type="null",
                           es_column=".",
                           es_index=".",
                           nested_path=["."]))

        # EVERY COLUMN, AND THE INDEX IT TAKES UP
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        index_to_uid = {}  # FROM NESTED PATH TO THE INDEX OF UID
        sql_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.sf.tables.items())
        }

        sorts = []
        if query.sort:
            for select in query.sort:
                col = select.value.to_sql(schema)[0]
                for t, sql in col.sql.items():
                    json_type = sql_type_to_json_type[t]
                    if json_type in STRUCT:
                        continue
                    column_number = len(sql_selects)
                    # SQL HAS ABS TABLE REFERENCE
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(sql_alias(sql, column_alias))
                    if select.sort == -1:
                        sorts.append(column_alias + SQL_IS_NOT_NULL)
                        sorts.append(column_alias + " DESC")
                    else:
                        sorts.append(column_alias + SQL_IS_NULL)
                        sorts.append(column_alias)

        primary_doc_details = Data()
        # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH
        # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED
        nested_path = []
        for step, sub_table in self.sf.tables.items():
            nested_path.insert(0, step)
            nested_doc_details = {
                "sub_table": sub_table,
                "children": [],
                "index_to_column": {},
                "nested_path": nested_path
            }

            # INSERT INTO TREE
            if not primary_doc_details:
                primary_doc_details = nested_doc_details
            else:

                def place(parent_doc_details):
                    if startswith_field(step,
                                        parent_doc_details['nested_path'][0]):
                        for c in parent_doc_details['children']:
                            if place(c):
                                return True
                        parent_doc_details['children'].append(
                            nested_doc_details)

                place(primary_doc_details)

            alias = nested_doc_details['alias'] = nest_to_alias[step]

            # WE ALWAYS ADD THE UID
            column_number = index_to_uid[step] = nested_doc_details[
                'id_coord'] = len(sql_selects)
            sql_select = join_column(alias, quoted_UID)
            sql_selects.append(
                sql_alias(sql_select, _make_column_name(column_number)))
            if step != ".":
                # ID AND ORDER FOR CHILD TABLES
                index_to_column[column_number] = ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=nested_path,
                    column_alias=_make_column_name(column_number))
                column_number = len(sql_selects)
                sql_select = join_column(alias, quoted_ORDER)
                sql_selects.append(
                    sql_alias(sql_select, _make_column_name(column_number)))
                index_to_column[column_number] = ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=nested_path,
                    column_alias=_make_column_name(column_number))

            # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM
            if step not in active_columns:
                continue

            # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE
            si = 0
            for select in listwrap(query.select):
                try:
                    column_number = len(sql_selects)
                    select.pull = get_column(column_number)
                    db_columns = select.value.partial_eval().to_sql(schema)

                    for column in db_columns:
                        if isinstance(column.nested_path, list):
                            column.nested_path = column.nested_path[
                                0]  # IN THE EVENT THIS "column" IS MULTIVALUED
                        for t, unsorted_sql in column.sql.items():
                            json_type = sql_type_to_json_type[t]
                            if json_type in STRUCT:
                                continue
                            column_number = len(sql_selects)
                            column_alias = _make_column_name(column_number)
                            sql_selects.append(
                                sql_alias(unsorted_sql, column_alias))
                            if startswith_field(primary_nested_path,
                                                step) and isinstance(
                                                    select.value, LeavesOp):
                                # ONLY FLATTEN primary_nested_path AND PARENTS, NOT CHILDREN
                                index_to_column[
                                    column_number] = nested_doc_details[
                                        'index_to_column'][
                                            column_number] = ColumnMapping(
                                                push_name=literal_field(
                                                    get_property_name(
                                                        concat_field(
                                                            select.name,
                                                            column.name))),
                                                push_child=".",
                                                push_column_name=
                                                get_property_name(
                                                    concat_field(
                                                        select.name,
                                                        column.name)),
                                                push_column=si,
                                                pull=get_column(column_number),
                                                sql=unsorted_sql,
                                                type=json_type,
                                                column_alias=column_alias,
                                                nested_path=nested_path)
                                si += 1
                            else:
                                index_to_column[
                                    column_number] = nested_doc_details[
                                        'index_to_column'][
                                            column_number] = ColumnMapping(
                                                push_name=select.name,
                                                push_child=column.name,
                                                push_column_name=select.name,
                                                push_column=si,
                                                pull=get_column(column_number),
                                                sql=unsorted_sql,
                                                type=json_type,
                                                column_alias=column_alias,
                                                nested_path=nested_path)
                finally:
                    si += 1

        where_clause = BooleanOp("boolean", query.where).partial_eval().to_sql(
            schema, boolean=True)[0].sql.b
        unsorted_sql = self._make_sql_for_one_nest_in_set_op(
            ".", sql_selects, where_clause, active_columns, index_to_column)

        for n, _ in self.sf.tables.items():
            sorts.append(quote_column(COLUMN + text_type(index_to_uid[n])))

        ordered_sql = (SQL_SELECT + "*" + SQL_FROM + sql_iso(unsorted_sql) +
                       SQL_ORDERBY + sql_list(sorts) + SQL_LIMIT +
                       quote_value(query.limit))
        self.db.create_new_functions()  # creating new functions: regexp
        result = self.db.query(ordered_sql)

        def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id,
                               parent_id_coord):
            """
            :param rows: REVERSED STACK OF ROWS (WITH push() AND pop())
            :param row: CURRENT ROW BEING EXTRACTED
            :param nested_doc_details: {
                    "nested_path": wrap_nested_path(nested_path),
                    "index_to_column": map from column number to column details
                    "children": all possible direct decedents' nested_doc_details
                 }
            :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop)
            :param parent_id_coord: the column number for the parent id (so we ca extract from each row)
            :return: the nested property (usually an array)
            """
            previous_doc_id = None
            doc = Data()
            output = []
            id_coord = nested_doc_details['id_coord']

            while True:
                doc_id = row[id_coord]

                if doc_id == None or (parent_id_coord is not None and
                                      row[parent_id_coord] != parent_doc_id):
                    rows.append(
                        row
                    )  # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc)
                    return output

                if doc_id != previous_doc_id:
                    previous_doc_id = doc_id
                    doc = Data()
                    curr_nested_path = nested_doc_details['nested_path'][0]
                    index_to_column = nested_doc_details[
                        'index_to_column'].items()
                    if index_to_column:
                        for i, c in index_to_column:
                            value = row[i]
                            if isinstance(query.select, list) or isinstance(
                                    query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_field = concat_field(
                                    c.push_name, c.push_child)
                            else:  # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_field = c.push_child

                            if relative_field == ".":
                                if value == '':
                                    doc = Null
                                else:
                                    doc = value
                            elif value != None and value != '':
                                doc[relative_field] = value

                for child_details in nested_doc_details['children']:
                    # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS
                    child_id = row[child_details['id_coord']]
                    if child_id is not None:
                        nested_value = _accumulate_nested(
                            rows, row, child_details, doc_id, id_coord)
                        if nested_value:
                            push_name = child_details['nested_path'][0]
                            if isinstance(query.select, list) or isinstance(
                                    query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_field = relative_field(
                                    push_name, curr_nested_path)
                            else:  # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_field = "."

                            if relative_field == "." and doc is Null:
                                doc = nested_value
                            elif relative_field == ".":
                                doc = unwraplist(nested_value)
                            else:
                                doc[relative_field] = unwraplist(nested_value)

                output.append(doc)

                try:
                    row = rows.pop()
                except IndexError:
                    return output

        cols = tuple(
            [i for i in index_to_column.values() if i.push_name != None])
        rows = list(reversed(unwrap(result.data)))
        if rows:
            row = rows.pop()
            data = _accumulate_nested(rows, row, primary_doc_details, None,
                                      None)
        else:
            data = result.data

        if query.format == "cube":
            for f, _ in self.sf.tables.items():
                if frum.endswith(f) or (test_dots(cols)
                                        and isinstance(query.select, list)):
                    num_rows = len(result.data)
                    num_cols = MAX([c.push_column
                                    for c in cols]) + 1 if len(cols) else 0
                    map_index_to_name = {
                        c.push_column: c.push_column_name
                        for c in cols
                    }
                    temp_data = [[None] * num_rows for _ in range(num_cols)]
                    for rownum, d in enumerate(result.data):
                        for c in cols:
                            if c.push_child == ".":
                                temp_data[c.push_column][rownum] = c.pull(d)
                            else:
                                column = temp_data[c.push_column][rownum]
                                if column is None:
                                    column = temp_data[
                                        c.push_column][rownum] = {}
                                column[c.push_child] = c.pull(d)
                    output = Data(meta={"format": "cube"},
                                  data={
                                      n: temp_data[c]
                                      for c, n in map_index_to_name.items()
                                  },
                                  edges=[{
                                      "name": "rownum",
                                      "domain": {
                                          "type": "rownum",
                                          "min": 0,
                                          "max": num_rows,
                                          "interval": 1
                                      }
                                  }])
                    return output

            if isinstance(query.select, list) or isinstance(
                    query.select.value, LeavesOp):
                num_rows = len(data)
                temp_data = {
                    c.push_column_name: [None] * num_rows
                    for c in cols
                }
                for rownum, d in enumerate(data):
                    for c in cols:
                        temp_data[c.push_column_name][rownum] = d[c.push_name]
                return Data(meta={"format": "cube"},
                            data=temp_data,
                            edges=[{
                                "name": "rownum",
                                "domain": {
                                    "type": "rownum",
                                    "min": 0,
                                    "max": num_rows,
                                    "interval": 1
                                }
                            }])
            else:
                num_rows = len(data)
                map_index_to_name = {
                    c.push_column: c.push_column_name
                    for c in cols
                }
                temp_data = [data]

                return Data(meta={"format": "cube"},
                            data={
                                n: temp_data[c]
                                for c, n in map_index_to_name.items()
                            },
                            edges=[{
                                "name": "rownum",
                                "domain": {
                                    "type": "rownum",
                                    "min": 0,
                                    "max": num_rows,
                                    "interval": 1
                                }
                            }])

        elif query.format == "table":
            for f, _ in self.sf.tables.items():
                if frum.endswith(f):
                    num_column = MAX([c.push_column for c in cols]) + 1
                    header = [None] * num_column
                    for c in cols:
                        header[c.push_column] = c.push_column_name

                    output_data = []
                    for d in result.data:
                        row = [None] * num_column
                        for c in cols:
                            set_column(row, c.push_column, c.push_child,
                                       c.pull(d))
                        output_data.append(row)

                    return Data(meta={"format": "table"},
                                header=header,
                                data=output_data)
            if isinstance(query.select, list) or isinstance(
                    query.select.value, LeavesOp):
                column_names = [None] * (max(c.push_column for c in cols) + 1)
                for c in cols:
                    column_names[c.push_column] = c.push_column_name

                temp_data = []
                for rownum, d in enumerate(data):
                    row = [None] * len(column_names)
                    for c in cols:
                        row[c.push_column] = d[c.push_name]
                    temp_data.append(row)

                return Data(meta={"format": "table"},
                            header=column_names,
                            data=temp_data)
            else:
                column_names = listwrap(query.select).name
                return Data(meta={"format": "table"},
                            header=column_names,
                            data=[[d] for d in data])

        else:
            for f, _ in self.sf.tables.items():
                if frum.endswith(f) or (test_dots(cols)
                                        and isinstance(query.select, list)):
                    data = []
                    for d in result.data:
                        row = Data()
                        for c in cols:
                            if c.push_child == ".":
                                row[c.push_name] = c.pull(d)
                            elif c.num_push_columns:
                                tuple_value = row[c.push_name]
                                if not tuple_value:
                                    tuple_value = row[c.push_name] = [
                                        None
                                    ] * c.num_push_columns
                                tuple_value[c.push_child] = c.pull(d)
                            else:
                                row[c.push_name][c.push_child] = c.pull(d)

                        data.append(row)

                    return Data(meta={"format": "list"}, data=data)

            if isinstance(query.select, list) or isinstance(
                    query.select.value, LeavesOp):
                temp_data = []
                for rownum, d in enumerate(data):
                    row = {}
                    for c in cols:
                        row[c.push_column_name] = d[c.push_name]
                    temp_data.append(row)
                return Data(meta={"format": "list"}, data=temp_data)
            else:
                return Data(meta={"format": "list"}, data=data)
    def _groupby_op(self, query, frum):
        base_table, path = tail_field(frum)
        schema = self.sf.tables[path].schema
        index_to_column = {}
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.sf.tables.items())
        }
        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = join_field(
            [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias
        previous = tables[0]
        for t in tables[1::]:
            from_sql += (SQL_LEFT_JOIN +
                         quote_column(concat_field(base_table, t.nest)) + " " +
                         t.alias + SQL_ON +
                         join_column(t.alias, quoted_PARENT) + " = " +
                         join_column(previous.alias, quoted_UID))

        selects = []
        groupby = []
        for i, e in enumerate(query.groupby):
            for edge_sql in e.value.to_sql(schema):
                column_number = len(selects)
                sql_type, sql = edge_sql.sql.items()[0]
                if sql is SQL_NULL and not e.value.var in schema.keys():
                    Log.error("No such column {{var}}", var=e.value.var)

                column_alias = _make_column_name(column_number)
                groupby.append(sql)
                selects.append(sql_alias(sql, column_alias))
                if edge_sql.nested_path == ".":
                    select_name = edge_sql.name
                else:
                    select_name = "."
                index_to_column[column_number] = ColumnMapping(
                    is_edge=True,
                    push_name=e.name,
                    push_column_name=e.name.replace("\\.", "."),
                    push_column=i,
                    push_child=select_name,
                    pull=get_column(column_number),
                    sql=sql,
                    column_alias=column_alias,
                    type=sql_type_to_json_type[sql_type])

        for i, select in enumerate(listwrap(query.select)):
            column_number = len(selects)
            sql_type, sql = select.value.to_sql(schema)[0].sql.items()[0]
            if sql == 'NULL' and not select.value.var in schema.keys():
                Log.error("No such column {{var}}", var=select.value.var)

            if select.value == "." and select.aggregate == "count":
                selects.append(
                    sql_alias(sql_count(SQL_ONE), quote_column(select.name)))
            else:
                selects.append(
                    sql_alias(sql_aggs[select.aggregate] + sql_iso(sql),
                              quote_column(select.name)))

            index_to_column[column_number] = ColumnMapping(
                push_name=select.name,
                push_column_name=select.name,
                push_column=i + len(query.groupby),
                push_child=".",
                pull=get_column(column_number),
                sql=sql,
                column_alias=quote_column(select.name),
                type=sql_type_to_json_type[sql_type])

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        where = query.where.to_sql(schema)[0].sql.b

        command = (SQL_SELECT + (sql_list(selects)) + SQL_FROM + from_sql +
                   SQL_WHERE + where + SQL_GROUPBY + sql_list(groupby))

        if query.sort:
            command += SQL_ORDERBY + sql_list(
                sql_iso(sql[t]) + SQL_IS_NULL + "," + sql[t] +
                (" DESC" if s.sort == -1 else "")
                for s, sql in [(s, s.value.to_sql(schema)[0].sql)
                               for s in query.sort] for t in "bns" if sql[t])

        return command, index_to_column
    def _compose_sql(self, get_ids):
        """
        :param get_ids: SQL to get the ids, and used to select the documents returned
        :return:
        """

        sql = []
        for nested_path in self.all_nested_paths:
            # MAKE THE REQUIRED JOINS
            sql_joins = []

            for i, curr_join in enumerate(
                    self.nested_path_to_join[nested_path[0]]):
                curr_join = wrap(curr_join)
                rel = curr_join.join_columns[0]
                if i == 0:
                    sql_joins.append(
                        SQL_FROM +
                        sql_alias(sql_iso(get_ids),
                                  quote_column(rel.referenced.table.alias)))
                elif curr_join.children:
                    full_name = quote_column(rel.table.name, rel.table.schema)
                    sql_joins.append(SQL_JOIN + sql_alias(
                        full_name, quote_column(rel.table.alias)
                    ) + SQL_ON + sql_and(
                        quote_column(const_col.column.name, rel.table.alias) +
                        "=" + quote_column(const_col.referenced.column.name,
                                           rel.referenced.table.alias)
                        for const_col in curr_join.join_columns))
                else:
                    full_name = quote_column(rel.referenced.table.name,
                                             rel.referenced.table.schema)
                    sql_joins.append(SQL_LEFT_JOIN + sql_alias(
                        full_name, quote_column(rel.referenced.table.alias)
                    ) + SQL_ON + sql_and(
                        quote_column(const_col.referenced.column.name,
                                     rel.referenced.table.alias) + "=" +
                        quote_column(const_col.column.name, rel.table.alias)
                        for const_col in curr_join.join_columns))

            # ONLY SELECT WHAT WE NEED, NULL THE REST
            selects = []
            not_null_column_seen = False
            for ci, c in enumerate(self.columns):
                if c.column_alias[1:] != text_type(ci):
                    Log.error("expecting consistency")
                if c.nested_path[0] == nested_path[0]:
                    s = sql_alias(
                        quote_column(c.column.column.name, c.table_alias),
                        quote_column(c.column_alias))
                    if s == None:
                        Log.error("bug")
                    selects.append(s)
                    not_null_column_seen = True
                elif startswith_field(nested_path[0], c.path):
                    # PARENT ID REFERENCES
                    if c.column.is_id:
                        s = sql_alias(
                            quote_column(c.column.column.name, c.table_alias),
                            quote_column(c.column_alias))
                        selects.append(s)
                        not_null_column_seen = True
                    else:
                        selects.append(
                            sql_alias(SQL_NULL, quote_column(c.column_alias)))
                else:
                    selects.append(
                        sql_alias(SQL_NULL, quote_column(c.column_alias)))

            if not_null_column_seen:
                sql.append(SQL_SELECT + sql_list(selects) + "".join(sql_joins))
        return sql
Beispiel #13
0
    def _setop(self, query):
        """
        NO AGGREGATION, SIMPLE LIST COMPREHENSION
        """
        if isinstance(query.select, list):
            # RETURN BORING RESULT SET
            selects = FlatList()
            for s in listwrap(query.select):
                if isinstance(s.value, Mapping):
                    for k, v in s.value.items:
                        selects.append(sql_alias(v, quote_column(s.name + "." + k)))
                if isinstance(s.value, list):
                    for i, ss in enumerate(s.value):
                        selects.append(sql_alias(s.value, quote_column(s.name + "," + str(i))))
                else:
                    selects.append(sql_alias(s.value, quote_column(s.name)))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
                {{sort}}
                {{limit}}
            """, {
                "selects": SQL(",\n".join(selects)),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where),
                "limit": self._limit2sql(query.limit),
                "sort": self._sort2sql(query.sort)
            })

            def post_process(sql):
                result = self.db.query(sql)
                for s in listwrap(query.select):
                    if isinstance(s.value, Mapping):
                        for r in result:
                            r[s.name] = {}
                            for k, v in s.value:
                                r[s.name][k] = r[s.name + "." + k]
                                r[s.name + "." + k] = None

                    if isinstance(s.value, list):
                        # REWRITE AS TUPLE
                        for r in result:
                            r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value))
                            for i, ss in enumerate(s.value):
                                r[s.name + "," + str(i)] = None

                expand_json(result)
                return result

            return sql, post_process  # RETURN BORING RESULT SET
        else:
            # RETURN LIST OF VALUES
            if query.select.value == ".":
                select = "*"
            else:
                name = query.select.name
                select = sql_alias(query.select.value, quote_column(name))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
                {{sort}}
                {{limit}}
            """, {
                "selects": SQL(select),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where),
                "limit": self._limit2sql(query.limit),
                "sort": self._sort2sql(query.sort)
            })

            if query.select.value == ".":
                def post(sql):
                    result = self.db.query(sql)
                    expand_json(result)
                    return result

                return sql, post
            else:
                return sql, lambda sql: [r[name] for r in self.db.query(sql)]  # RETURNING LIST OF VALUES
Beispiel #14
0
    def _grouped(self, query, stacked=False):
        select = listwrap(query.select)

        # RETURN SINGLE OBJECT WITH AGGREGATES
        for s in select:
            if s.aggregate not in aggregates:
                Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)

        selects = FlatList()
        groups = FlatList()
        edges = query.edges
        for e in edges:
            if e.domain.type != "default":
                Log.error("domain of type {{type}} not supported, yet", type=e.domain.type)
            groups.append(e.value)
            selects.append(sql_alias(e.value, quote_column(e.name)))

        for s in select:
            selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value), quote_column(s.name)))

        sql = expand_template("""
            SELECT
                {{selects}}
            FROM
                {{table}}
            {{where}}
            GROUP BY
                {{groups}}
        """, {
            "selects": SQL(",\n".join(selects)),
            "groups": SQL(",\n".join(groups)),
            "table": self._subquery(query["from"])[0],
            "where": self._where2sql(query.where)
        })

        def post_stacked(sql):
            # RETURN IN THE USUAL DATABASE RESULT SET FORMAT
            return self.db.query(sql)

        def post(sql):
            # FIND OUT THE default DOMAIN SIZES
            result = self.db.column_query(sql)
            num_edges = len(edges)
            for e, edge in enumerate(edges):
                domain = edge.domain
                if domain.type == "default":
                    domain.type = "set"
                    parts = set(result[e])
                    domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)]
                    domain.map = {p: i for i, p in enumerate(parts)}
                else:
                    Log.error("Do not know what to do here, yet")

            # FILL THE DATA CUBE
            maps = [(unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)]
            cubes = FlatList()
            for c, s in enumerate(select):
                data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges])
                for rownum, value in enumerate(result[c + num_edges]):
                    coord = [m[r[rownum]] for m, r in maps]
                    data[coord] = value
                cubes.append(data)

            if isinstance(query.select, list):
                return cubes
            else:
                return cubes[0]

        return sql, post if not stacked else post_stacked
Beispiel #15
0
    def _setop(self, query):
        """
        NO AGGREGATION, SIMPLE LIST COMPREHENSION
        """
        if isinstance(query.select, list):
            # RETURN BORING RESULT SET
            selects = FlatList()
            for s in listwrap(query.select):
                if isinstance(s.value, Mapping):
                    for k, v in s.value.items:
                        selects.append(sql_alias(v, self.db.quote_column(s.name + "." + k)))
                if isinstance(s.value, list):
                    for i, ss in enumerate(s.value):
                        selects.append(sql_alias(s.value, self.db.quote_column(s.name + "," + str(i))))
                else:
                    selects.append(sql_alias(s.value, self.db.quote_column(s.name)))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
                {{sort}}
                {{limit}}
            """, {
                "selects": SQL(",\n".join(selects)),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where),
                "limit": self._limit2sql(query.limit),
                "sort": self._sort2sql(query.sort)
            })

            def post_process(sql):
                result = self.db.query(sql)
                for s in listwrap(query.select):
                    if isinstance(s.value, Mapping):
                        for r in result:
                            r[s.name] = {}
                            for k, v in s.value:
                                r[s.name][k] = r[s.name + "." + k]
                                r[s.name + "." + k] = None

                    if isinstance(s.value, list):
                        # REWRITE AS TUPLE
                        for r in result:
                            r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value))
                            for i, ss in enumerate(s.value):
                                r[s.name + "," + str(i)] = None

                expand_json(result)
                return result

            return sql, post_process  # RETURN BORING RESULT SET
        else:
            # RETURN LIST OF VALUES
            if query.select.value == ".":
                select = "*"
            else:
                name = query.select.name
                select = sql_alias(query.select.value, self.db.quote_column(name))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
                {{sort}}
                {{limit}}
            """, {
                "selects": SQL(select),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where),
                "limit": self._limit2sql(query.limit),
                "sort": self._sort2sql(query.sort)
            })

            if query.select.value == ".":
                def post(sql):
                    result = self.db.query(sql)
                    expand_json(result)
                    return result

                return sql, post
            else:
                return sql, lambda sql: [r[name] for r in self.db.query(sql)]  # RETURNING LIST OF VALUES
Beispiel #16
0
    def _grouped(self, query, stacked=False):
        select = listwrap(query.select)

        # RETURN SINGLE OBJECT WITH AGGREGATES
        for s in select:
            if s.aggregate not in aggregates:
                Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)

        selects = FlatList()
        groups = FlatList()
        edges = query.edges
        for e in edges:
            if e.domain.type != "default":
                Log.error("domain of type {{type}} not supported, yet", type=e.domain.type)
            groups.append(e.value)
            selects.append(sql_alias(e.value, self.db.quote_column(e.name)))

        for s in select:
            selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value), self.db.quote_column(s.name)))

        sql = expand_template("""
            SELECT
                {{selects}}
            FROM
                {{table}}
            {{where}}
            GROUP BY
                {{groups}}
        """, {
            "selects": SQL(",\n".join(selects)),
            "groups": SQL(",\n".join(groups)),
            "table": self._subquery(query["from"])[0],
            "where": self._where2sql(query.where)
        })

        def post_stacked(sql):
            # RETURN IN THE USUAL DATABASE RESULT SET FORMAT
            return self.db.query(sql)

        def post(sql):
            # FIND OUT THE default DOMAIN SIZES
            result = self.db.column_query(sql)
            num_edges = len(edges)
            for e, edge in enumerate(edges):
                domain = edge.domain
                if domain.type == "default":
                    domain.type = "set"
                    parts = set(result[e])
                    domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)]
                    domain.map = {p: i for i, p in enumerate(parts)}
                else:
                    Log.error("Do not know what to do here, yet")

            # FILL THE DATA CUBE
            maps = [(unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)]
            cubes = FlatList()
            for c, s in enumerate(select):
                data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges])
                for rownum, value in enumerate(result[c + num_edges]):
                    coord = [m[r[rownum]] for m, r in maps]
                    data[coord] = value
                cubes.append(data)

            if isinstance(query.select, list):
                return cubes
            else:
                return cubes[0]

        return sql, post if not stacked else post_stacked