Esempio n. 1
0
    def _make_range_domain(self, domain, column_name):
        width = (domain.max - domain.min) / domain.interval
        digits = Math.floor(Math.log10(width - 1))
        if digits == 0:
            value = "a.value"
        else:
            value = "+".join("1" + ("0" * j) + "*" +
                             unicode(chr(ord(b'a') + j)) + ".value"
                             for j in range(digits + 1))

        if domain.interval == 1:
            if domain.min == 0:
                domain = "SELECT " + value + " " + column_name + \
                         "\nFROM __digits__ a"
            else:
                domain = "SELECT (" + value + ") + " + quote_value(domain.min) + " " + column_name + \
                         "\nFROM __digits__ a"
        else:
            if domain.min == 0:
                domain = "SELECT " + value + " * " + quote_value(domain.interval) + " " + column_name + \
                         "\nFROM __digits__ a"
            else:
                domain = "SELECT (" + value + " * " + quote_value(domain.interval) + ") + " + quote_value(
                    domain.min) + " " + column_name + \
                         "\nFROM __digits__ a"

        for j in range(digits):
            domain += "\nJOIN __digits__ " + unicode(
                chr(ord(b'a') + j + 1)) + " ON 1=1"
        domain += "\nWHERE " + value + " < " + quote_value(width)
        return domain
Esempio n. 2
0
    def _insert(self, collection):
        for nested_path, details in collection.items():
            active_columns = wrap(list(details.active_columns))
            rows = details.rows
            table_name = concat_field(self.name, nested_path)

            if table_name == self.name:
                # DO NOT REQUIRE PARENT OR ORDER COLUMNS
                meta_columns = [UID]
            else:
                meta_columns = [UID, PARENT, ORDER]

            all_columns = meta_columns + active_columns.es_column

            prefix = "INSERT INTO " + quote_table(table_name) + \
                     "(" + ",".join(map(quote_table, all_columns)) + ")"

            # BUILD THE RECORDS
            records = " UNION ALL ".join(
                "\nSELECT " +
                ",".join(quote_value(row.get(c)) for c in all_columns)
                for row in unwrap(rows))

            self.db.execute(prefix + records)
Esempio n. 3
0
    def _set_op(self, query, frum):
        # GET LIST OF COLUMNS
        primary_nested_path = join_field(split_field(frum)[1:])
        vars_ = UNION([s.value.vars() for s in listwrap(query.select)])

        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.nested_tables.items())
        }

        active_columns = {".": []}
        for cname, cols in self.columns.items():
            if any(startswith_field(cname, v) for v in vars_):
                for c in cols:
                    if c.type in STRUCT:
                        continue
                    nest = c.nested_path[0]
                    active = active_columns.get(nest)
                    if not active:
                        active = active_columns[nest] = []
                    active.append(c)
        # ANY VARS MENTIONED WITH NO COLUMNS?
        for v in vars_:
            if not any(
                    startswith_field(cname, v)
                    for cname in self.columns.keys()):
                active_columns["."].append(
                    Column(names={self.name: v},
                           type="null",
                           es_column=".",
                           es_index=".",
                           nested_path=["."]))

        # EVERY COLUMN, AND THE INDEX IT TAKES UP
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        index_to_uid = {}  # FROM NESTED PATH TO THE INDEX OF UID
        sql_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.nested_tables.items())
        }

        sorts = []
        if query.sort:
            for s in query.sort:
                col = s.value.to_sql(self)[0]
                for t, sql in col.sql.items():
                    json_type = sql_type_to_json_type[t]
                    if json_type in STRUCT:
                        continue
                    column_number = len(sql_selects)
                    # SQL HAS ABS TABLE REFERENCE
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(sql + " AS " + column_alias)
                    if s.sort == -1:
                        sorts.append(column_alias + " IS NOT NULL")
                        sorts.append(column_alias + " DESC")
                    else:
                        sorts.append(column_alias + " IS NULL")
                        sorts.append(column_alias)

        primary_doc_details = Data()
        # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH
        # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED
        for nested_path, sub_table in self.nested_tables.items():
            nested_doc_details = {
                "sub_table": sub_table,
                "children": [],
                "index_to_column": {},
                "nested_path":
                [nested_path
                 ]  # fake the real nested path, we only look at [0] anyway
            }

            # INSERT INTO TREE
            if not primary_doc_details:
                primary_doc_details = nested_doc_details
            else:

                def place(parent_doc_details):
                    if startswith_field(nested_path,
                                        parent_doc_details['nested_path'][0]):
                        for c in parent_doc_details['children']:
                            if place(c):
                                return True
                        parent_doc_details['children'].append(
                            nested_doc_details)

                place(primary_doc_details)

            alias = nested_doc_details['alias'] = nest_to_alias[nested_path]

            # WE ALWAYS ADD THE UID AND ORDER
            column_number = index_to_uid[nested_path] = nested_doc_details[
                'id_coord'] = len(sql_selects)
            sql_select = alias + "." + quoted_UID
            sql_selects.append(sql_select + " AS " +
                               _make_column_name(column_number))
            if nested_path != ".":
                sql_select = alias + "." + quote_table(ORDER)
                sql_selects.append(sql_select + " AS " +
                                   _make_column_name(column_number))

            # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM
            if nested_path not in active_columns:
                continue

            if primary_nested_path == nested_path:
                # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE
                si = 0
                for s in listwrap(query.select):
                    try:
                        column_number = len(sql_selects)
                        s.pull = get_column(column_number)
                        db_columns = s.value.to_sql(self)

                        if isinstance(s.value, LeavesOp):
                            for column in db_columns:
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(
                                        column_number)
                                    sql_selects.append(unsorted_sql + " AS " +
                                                       column_alias)
                                    index_to_column[
                                        column_number] = nested_doc_details[
                                            'index_to_column'][
                                                column_number] = Data(
                                                    push_name=concat_field(
                                                        s.name, column.name),
                                                    push_column=si,
                                                    push_child=".",
                                                    pull=get_column(
                                                        column_number),
                                                    sql=unsorted_sql,
                                                    type=json_type,
                                                    nested_path=[nested_path]
                                                    # fake the real nested path, we only look at [0] anyway
                                                )
                                    si += 1
                        else:
                            for column in db_columns:
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(
                                        column_number)
                                    sql_selects.append(unsorted_sql + " AS " +
                                                       column_alias)
                                    index_to_column[
                                        column_number] = nested_doc_details[
                                            'index_to_column'][
                                                column_number] = Data(
                                                    push_name=s.name,
                                                    push_column=si,
                                                    push_child=column.name,
                                                    pull=get_column(
                                                        column_number),
                                                    sql=unsorted_sql,
                                                    type=json_type,
                                                    nested_path=[nested_path]
                                                    # fake the real nested path, we only look at [0] anyway
                                                )
                    finally:
                        si += 1
            elif startswith_field(nested_path, primary_nested_path):
                # ADD REQUIRED COLUMNS, FOR DEEP STUFF
                for ci, c in enumerate(active_columns[nested_path]):
                    if c.type in STRUCT:
                        continue

                    column_number = len(sql_selects)
                    nested_path = c.nested_path
                    unsorted_sql = nest_to_alias[
                        nested_path[0]] + "." + quote_table(c.es_column)
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(unsorted_sql + " AS " + column_alias)
                    index_to_column[column_number] = nested_doc_details[
                        'index_to_column'][column_number] = Data(
                            push_name=s.name,
                            push_column=si,
                            push_child=relative_field(c.name, s.name),
                            pull=get_column(column_number),
                            sql=unsorted_sql,
                            type=c.type,
                            nested_path=nested_path)

        where_clause = query.where.to_sql(self, boolean=True)[0].sql.b

        unsorted_sql = self._make_sql_for_one_nest_in_set_op(
            ".", sql_selects, where_clause, active_columns, index_to_column)

        for n, _ in self.nested_tables.items():
            sorts.append(COLUMN + unicode(index_to_uid[n]))

        ordered_sql = ("SELECT * FROM (\n" + unsorted_sql + "\n)" +
                       "\nORDER BY\n" + ",\n".join(sorts) + "\nLIMIT " +
                       quote_value(query.limit))
        result = self.db.query(ordered_sql)

        def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id,
                               parent_id_coord):
            """
            :param rows: REVERSED STACK OF ROWS (WITH push() AND pop())
            :param row: CURRENT ROW BEING EXTRACTED
            :param nested_doc_details: {
                    "nested_path": wrap_nested_path(nested_path),
                    "index_to_column": map from column number to column details
                    "children": all possible direct decedents' nested_doc_details
                 }
            :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop)
            :param parent_id_coord: the column number for the parent id (so we ca extract from each row)
            :return: the nested property (usually an array)
            """
            previous_doc_id = None
            doc = Data()
            output = []
            id_coord = nested_doc_details['id_coord']

            while True:
                doc_id = row[id_coord]

                if doc_id == None or (parent_id_coord is not None and
                                      row[parent_id_coord] != parent_doc_id):
                    rows.append(row)  # UNDO
                    output = unwraplist(output)
                    return output if output else None

                if doc_id != previous_doc_id:
                    previous_doc_id = doc_id
                    doc = Data()
                    curr_nested_path = nested_doc_details['nested_path'][0]
                    if isinstance(query.select, list) or isinstance(
                            query.select.value, LeavesOp):
                        # ASSIGN INNER PROPERTIES
                        for i, c in nested_doc_details[
                                'index_to_column'].items():
                            value = row[i]
                            if value == None:
                                continue
                            if value == '':
                                continue

                            relative_path = relative_field(
                                concat_field(c.push_name, c.push_child),
                                curr_nested_path)
                            if relative_path == ".":
                                doc = value
                            else:
                                doc[relative_path] = value
                    else:
                        # ASSIGN INNER PROPERTIES
                        for i, c in nested_doc_details[
                                'index_to_column'].items():
                            value = row[i]
                            if value is not None:
                                relative_path = relative_field(
                                    c.push_child, curr_nested_path)
                                if relative_path == ".":
                                    doc = value
                                else:
                                    doc[relative_path] = value
                    output.append(doc)

                # ASSIGN NESTED ARRAYS
                for child_details in nested_doc_details['children']:
                    child_id = row[child_details['id_coord']]
                    if child_id is not None:
                        nested_value = _accumulate_nested(
                            rows, row, child_details, doc_id, id_coord)
                        if nested_value is not None:
                            path = child_details['nested_path'][0]
                            doc[path] = nested_value

                try:
                    row = rows.pop()
                except IndexError:
                    output = unwraplist(output)
                    return output if output else None

        cols = tuple(index_to_column.values())

        if query.format == "cube":
            num_rows = len(result.data)
            num_cols = MAX([c.push_column
                            for c in cols]) + 1 if len(cols) else 0
            map_index_to_name = {c.push_column: c.push_name for c in cols}
            temp_data = [[None] * num_rows for _ in range(num_cols)]
            for rownum, d in enumerate(result.data):
                for c in cols:
                    if c.push_child == ".":
                        temp_data[c.push_column][rownum] = c.pull(d)
                    else:
                        column = temp_data[c.push_column][rownum]
                        if column is None:
                            column = temp_data[c.push_column][rownum] = {}
                        column[c.push_child] = c.pull(d)

            output = Data(
                meta={"format": "cube"},
                data={n: temp_data[c]
                      for c, n in map_index_to_name.items()},
                edges=[{
                    "name": "rownum",
                    "domain": {
                        "type": "rownum",
                        "min": 0,
                        "max": num_rows,
                        "interval": 1
                    }
                }])
            return output
        elif query.format == "table":
            num_column = MAX([c.push_column for c in cols]) + 1
            header = [None] * num_column
            for c in cols:
                # header[c.push_column] = c.push_name
                sf = split_field(c.push_name)
                if len(sf) == 0:
                    header[c.push_column] = "."
                elif len(sf) == 1:
                    header[c.push_column] = sf[0]
                else:
                    # TABLES ONLY USE THE FIRST-LEVEL PROPERTY NAMES
                    # PUSH ALL DEEPER NAMES TO CHILD
                    header[c.push_column] = sf[0]
                    c.push_child = join_field(sf[1:] +
                                              split_field(c.push_child))

            output_data = []
            for d in result.data:
                row = [None] * num_column
                for c in cols:
                    set_column(row, c.push_column, c.push_child, c.pull(d))
                output_data.append(row)
            return Data(meta={"format": "table"},
                        header=header,
                        data=output_data)
        else:
            rows = list(reversed(unwrap(result.data)))
            row = rows.pop()
            output = Data(meta={"format": "list"},
                          data=listwrap(
                              _accumulate_nested(rows, row,
                                                 primary_doc_details, None,
                                                 None)))
            return output
Esempio n. 4
0
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command['clear']))
        for c in self.get_leaves():
            if c.name in touched_columns and c.nested_path and len(
                    c.name) > len(c.nested_path[0]):
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.type not in STRUCT
        }
        where_sql = where.map(_map).to_sql()
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(names={self.name: new_column_name},
                            type=ctype,
                            es_index=self.name,
                            es_column=typed_column(new_column_name, ctype))
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = concat_field(self.name, nested_column_name)
                nested_table = nested_tables[nested_column_name]
                self_primary_key = ",".join(
                    quote_table(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID_PREFIX + "id" + unicode(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = "DELETE FROM " + quote_table(nested_table.name) + \
                              "\nWHERE EXISTS (" + \
                              "\nSELECT 1 " + \
                              "\nFROM " + quote_table(nested_table.name) + " n" + \
                              "\nJOIN (" + \
                              "\nSELECT " + self_primary_key + \
                              "\nFROM " + quote_table(self.name) + \
                              "\nWHERE " + where_sql + \
                              "\n) t ON " + \
                              " AND ".join(
                                  "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column)
                                  for u in self.uid
                                  for c in self.columns[u]
                              ) + \
                              ")"
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Data(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = "INSERT INTO " + quote_table(nested_table.name) + \
                         "(" + \
                         self_primary_key + "," + \
                         _quote_column(extra_key) + "," + \
                         ",".join(
                             quote_table(c.es_column)
                             for c in doc_collection.get(".", Null).active_columns
                         ) + ")"

                # BUILD THE PARENT TABLES
                parent = "\nSELECT " + \
                         self_primary_key + \
                         "\nFROM " + quote_table(self.name) + \
                         "\nWHERE " + jx_expression(command.where).to_sql()

                # BUILD THE RECORDS
                children = " UNION ALL ".join(
                    "\nSELECT " + quote_value(i) + " " +
                    quote_table(extra_key.es_column) + "," + ",".join(
                        quote_value(row[c.name]) + " " +
                        quote_table(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = prefix + \
                              "\nSELECT " + \
                              ",".join(
                                  "p." + quote_table(c.es_column)
                                  for u in self.uid for c in self.columns[u]
                              ) + "," + \
                              "c." + _quote_column(extra_key) + "," + \
                              ",".join(
                                  "c." + quote_table(c.es_column)
                                  for c in doc_collection.get(".", Null).active_columns
                              ) + \
                              "\nFROM (" + parent + ") p " + \
                              "\nJOIN (" + children + \
                              "\n) c on 1=1"

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(names={self.name: c.name},
                                        type=c.type,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        c.nested_path)
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.type not in [
                                c.type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = "UPDATE " + quote_table(self.name) + " SET " + \
                  ",\n".join(
                      [
                          _quote_column(c) + "=" + quote_value(get_if_type(v, c.type))
                          for k, v in command.set.items()
                          if get_type(v) != "nested"
                          for c in self.columns[k]
                          if c.type != "nested" and len(c.nested_path) == 1
                          ] +
                      [
                          _quote_column(c) + "=NULL"
                          for k in listwrap(command['clear'])
                          if k in self.columns
                          for c in self.columns[k]
                          if c.type != "nested" and len(c.nested_path) == 1
                          ]
                  ) + \
                  " WHERE " + where_sql

        self.db.execute(command)
Esempio n. 5
0
    def _edges_op(self, query, frum):
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        tables = []
        base_table = split_field(frum)[0]
        path = join_field(split_field(frum)[1:])
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path,
                    sub_table) in enumerate(self.nested_tables.items())
        }

        columns = self._get_sql_schema(frum)

        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = join_field(
            [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias
        previous = tables[0]
        for t in tables[1::]:
            from_sql += "\nLEFT JOIN\n" + join_field(
                [base_table] + split_field(t.nest)
            ) + " " + t.alias + " ON " + t.alias + "." + PARENT + " = " + previous.alias + "." + GUID

        # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH
        ons = []
        join_types = []
        wheres = []
        not_ons = ["__exists__ IS NULL"]
        groupby = []
        not_groupby = []
        orderby = []
        domains = []
        select_clause = [
            "1 __exists__"  # USED TO DISTINGUISH BETWEEN NULL-BECAUSE-LEFT-JOIN OR NULL-BECAUSE-NULL-VALUE
        ]

        for edge_index, query_edge in enumerate(query.edges):
            edge_alias = "e" + unicode(edge_index)

            if query_edge.value:
                edge_values = [
                    p for c in query_edge.value.to_sql(self).sql
                    for p in c.items()
                ]
            elif not query_edge.value and any(
                    query_edge.domain.partitions.where):
                case = "CASE "
                for pp, p in enumerate(query_edge.domain.partitions):
                    w = p.where.to_sql(self)[0].sql.b
                    t = quote_value(pp)
                    case += " WHEN " + w + " THEN " + t
                case += " ELSE NULL END "
                edge_values = [("n", case)]
            elif query_edge.range:
                edge_values = query_edge.range.min.to_sql(self)[0].sql.items(
                ) + query_edge.range.max.to_sql(self)[0].sql.items()
Esempio n. 6
0
                    num_push_columns=num_push_columns,
                    push_child=push_child,  # CAN NOT HANDLE TUPLES IN COLUMN
                    pull=pull,
                    sql=sql,
                    type=sql_type_to_json_type[json_type])

            vals = [v for t, v in edge_values]
            if query_edge.domain.type == "set":
                domain_name = "d" + unicode(edge_index) + "c" + unicode(
                    column_index)
                domain_names = [domain_name]
                if len(edge_names) > 1:
                    Log.error("Do not know how to handle")
                if query_edge.value:
                    domain = "\nUNION ALL\n".join(
                        "SELECT " + quote_value(coalesce(p.dataIndex, i)) +
                        " AS rownum, " + quote_value(p.value) + " AS " +
                        domain_name
                        for i, p in enumerate(query_edge.domain.partitions))
                    if query_edge.allowNulls:
                        domain += "\nUNION ALL\nSELECT " + quote_value(
                            len(query_edge.domain.partitions)
                        ) + " AS rownum, NULL AS " + domain_name
                    where = None
                    join_type = "LEFT JOIN" if query_edge.allowNulls else "JOIN"
                    on_clause = " OR ".join(
                        edge_alias + "." + k + " = " + v
                        for k, (t, v) in zip(domain_names, edge_values))
                    not_on_clause = None
                else:
                    domain = "\nUNION ALL\n".join(