コード例 #1
0
ファイル: snowflake.py プロジェクト: nknick99/MySQL-to-S3
    def create_fact(self, uid=UID):
        """
        MAKE NEW TABLE WITH GIVEN guid
        :param uid: name, or list of names, for the GUID
        :return: None
        """
        self.add_table_to_schema(["."])

        uid = listwrap(uid)
        new_columns = []
        for u in uid:
            if u == UID:
                pass
            else:
                c = Column(names={".": u},
                           type="string",
                           es_column=typed_column(u, "string"),
                           es_index=self.fact)
                self.add_column_to_schema(c)
                new_columns.append(c)

        command = ("CREATE TABLE " + quote_column(self.fact) + sql_iso(
            sql_list([quoted_GUID + " TEXT "] + [quoted_UID + " INTEGER"] + [
                quote_column(c.es_column) + " " + sql_types[c.type]
                for c in self.tables["."].schema.columns
            ] + [
                "PRIMARY KEY " + sql_iso(
                    sql_list([quoted_GUID] + [quoted_UID] + [
                        quote_column(c.es_column)
                        for c in self.tables["."].schema.columns
                    ]))
            ])))

        self.db.execute(command)
コード例 #2
0
ファイル: namespace.py プロジェクト: klahnakoski/annotations
    def create_or_replace_facts(self, fact_name, uid=UID):
        """
        MAKE NEW TABLE WITH GIVEN guid
        :param fact_name:  NAME FOR THE CENTRAL FACTS
        :param uid: name, or list of names, for the GUID
        :return: Facts
        """
        self.remove_snowflake(fact_name)
        self._snowflakes[fact_name] = ["."]

        uid = listwrap(uid)
        new_columns = []
        for u in uid:
            if u == UID:
                pass
            else:
                c = Column(name=u,
                           jx_type=mo_json.STRING,
                           es_column=typed_column(
                               u, json_type_to_sql_type[mo_json.STRING]),
                           es_type=json_type_to_sqlite_type[mo_json.STRING],
                           es_index=fact_name,
                           last_updated=Date.now())
                self.add_column_to_schema(c)
                new_columns.append(c)

        command = ("CREATE TABLE " + quote_column(fact_name) + sql_iso(
            sql_list([quoted_GUID + " TEXT "] + [quoted_UID + " INTEGER"] + [
                quote_column(c.es_column) + " " + c.es_type
                for c in new_columns
            ] + [
                "PRIMARY KEY " + sql_iso(
                    sql_list([quoted_GUID] + [quoted_UID] +
                             [quote_column(c.es_column) for c in new_columns]))
            ])))

        with self.db.transaction() as t:
            t.execute(command)

        snowflake = Snowflake(fact_name, self)
        return Facts(self, snowflake)
コード例 #3
0
    def create_snowflake(self, fact_name, uid=UID):
        """
        MAKE NEW TABLE WITH GIVEN guid
        :param fact_name:  NAME FOR THE CENTRAL FACTS
        :param uid: name, or list of names, for the GUID
        :return: Facts
        """
        self.add_table_to_schema(["."])

        uid = listwrap(uid)
        new_columns = []
        for u in uid:
            if u == UID:
                pass
            else:
                c = Column(
                    name=u,
                    jx_type=STRING,
                    es_column=typed_column(u, "string"),
                    es_index=fact_name
                )
                self.add_column_to_schema(c)
                new_columns.append(c)

        command = (
            "CREATE TABLE " + quote_column(fact_name) + sql_iso(sql_list(
                [quoted_GUID + " TEXT "] +
                [quoted_UID + " INTEGER"] +
                [quote_column(c.es_column) + " " + json_type_to_sqlite_type[c.jx_type] for c in self.tables["."].schema.columns] +
                ["PRIMARY KEY " + sql_iso(sql_list(
                    [quoted_GUID] +
                    [quoted_UID] +
                    [quote_column(c.es_column) for c in self.tables["."].schema.columns]
                ))]
            ))
        )

        self.db.execute(command)

        snowflake = Snowflake(fact_name, self)
        return Facts(self, snowflake)
コード例 #4
0
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command['clear']))
        for c in self.get_leaves():
            if c.name in touched_columns and c.nested_path and len(
                    c.name) > len(c.nested_path[0]):
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.type not in STRUCT
        }
        where_sql = where.map(_map).to_sql()
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(names={".": new_column_name},
                            type=ctype,
                            es_index=self.sf.fact,
                            es_column=typed_column(new_column_name, ctype))
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = concat_field(self.sf.fact,
                                                 nested_column_name)
                nested_table = nested_tables[nested_column_name]
                self_primary_key = ",".join(
                    quote_table(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID_PREFIX + "id" + text_type(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = "DELETE FROM " + quote_table(nested_table.name) + \
                              "\nWHERE EXISTS (" + \
                              "\nSELECT 1 " + \
                              "\nFROM " + quote_table(nested_table.name) + " n" + \
                              "\nJOIN (" + \
                              "\nSELECT " + self_primary_key + \
                              "\nFROM " + quote_table(self.sf.fact) + \
                              "\nWHERE " + where_sql + \
                              "\n) t ON " + \
                              " AND ".join(
                                  "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column)
                                  for u in self.uid
                                  for c in self.columns[u]
                              ) + \
                              ")"
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Data(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = "INSERT INTO " + quote_table(nested_table.name) + \
                         "(" + \
                         self_primary_key + "," + \
                         quote_column(extra_key) + "," + \
                         ",".join(
                             quote_table(c.es_column)
                             for c in doc_collection.get(".", Null).active_columns
                         ) + ")"

                # BUILD THE PARENT TABLES
                parent = "\nSELECT " + \
                         self_primary_key + \
                         "\nFROM " + quote_table(self.sf.fact) + \
                         "\nWHERE " + jx_expression(command.where).to_sql()

                # BUILD THE RECORDS
                children = " UNION ALL ".join(
                    "\nSELECT " + quote_value(i) + " " +
                    quote_table(extra_key.es_column) + "," + ",".join(
                        quote_value(row[c.name]) + " " +
                        quote_table(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = prefix + \
                              "\nSELECT " + \
                              ",".join(
                                  "p." + quote_table(c.es_column)
                                  for u in self.uid for c in self.columns[u]
                              ) + "," + \
                              "c." + quote_column(extra_key) + "," + \
                              ",".join(
                                  "c." + quote_table(c.es_column)
                                  for c in doc_collection.get(".", Null).active_columns
                              ) + \
                              "\nFROM (" + parent + ") p " + \
                              "\nJOIN (" + children + \
                              "\n) c on 1=1"

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(names={".": c.name},
                                        type=c.type,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        c.nested_path)
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.type not in [
                                c.type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = (
            "UPDATE " + quote_table(self.sf.fact) + " SET " + ",\n".join([
                quote_column(c) + "=" + quote_value(get_if_type(v, c.type))
                for k, v in command.set.items() if get_type(v) != "nested"
                for c in self.columns[k]
                if c.type != "nested" and len(c.nested_path) == 1
            ] + [
                quote_column(c) + "=NULL"
                for k in listwrap(command['clear']) if k in self.columns
                for c in self.columns[k]
                if c.type != "nested" and len(c.nested_path) == 1
            ]) + " WHERE " + where_sql)

        self.db.execute(command)
コード例 #5
0
        def _flatten(data,
                     uid,
                     parent_id,
                     order,
                     full_path,
                     nested_path,
                     row=None,
                     guid=None):
            """
            :param data: the data we are pulling apart
            :param uid: the uid we are giving this doc
            :param parent_id: the parent id of this (sub)doc
            :param order: the number of siblings before this one
            :param full_path: path to this (sub)doc
            :param nested_path: list of paths, deepest first
            :param row: we will be filling this
            :return:
            """
            table = concat_field(self.sf.fact, nested_path[0])
            insertion = doc_collection[nested_path[0]]
            if not row:
                row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order}
                insertion.rows.append(row)

            if not isinstance(data, Mapping):
                data = {".": data}
            for k, v in data.items():
                insertion = doc_collection[nested_path[0]]
                cname = concat_field(full_path, literal_field(k))
                value_type = get_type(v)
                if value_type is None:
                    continue

                if value_type in STRUCT:
                    c = unwraplist(
                        [cc for cc in abs_schema[cname] if cc.type in STRUCT])
                else:
                    c = unwraplist([
                        cc for cc in abs_schema[cname] if cc.type == value_type
                    ])

                if not c:
                    # WHAT IS THE NESTING LEVEL FOR THIS PATH?
                    deeper_nested_path = "."
                    for path, _ in nested_tables.items():
                        if startswith_field(
                                cname,
                                path) and len(deeper_nested_path) < len(path):
                            deeper_nested_path = path

                    c = Column(names={".": cname},
                               type=value_type,
                               es_column=typed_column(cname, value_type),
                               es_index=table,
                               nested_path=nested_path)
                    abs_schema.add(cname, c)
                    if value_type == "nested":
                        nested_tables[cname] = "fake table"

                    required_changes.append({"add": c})

                    # INSIDE IF BLOCK BECAUSE WE DO NOT WANT IT TO ADD WHAT WE columns.get() ALREADY
                    insertion.active_columns.add(c)
                elif c.type == "nested" and value_type == "object":
                    value_type = "nested"
                    v = [v]
                elif len(c.nested_path) < len(nested_path):
                    from_doc = doc_collection.get(c.nested_path[0], None)
                    column = c.es_column
                    from_doc.active_columns.remove(c)
                    abs_schema.remove(cname, c)
                    required_changes.append({"nest": (c, nested_path[0])})
                    deep_c = Column(names={".": cname},
                                    type=value_type,
                                    es_column=typed_column(cname, value_type),
                                    es_index=table,
                                    nested_path=nested_path)
                    abs_schema.add(cname, deep_c)
                    insertion.active_columns.add(deep_c)

                    for r in from_doc.rows:
                        r1 = unwrap(r)
                        if column in r1:
                            row1 = {
                                UID: self.next_uid(),
                                PARENT: r1["__id__"],
                                ORDER: 0,
                                column: r1[column]
                            }
                            insertion.rows.append(row1)

                elif len(c.nested_path) > len(nested_path):
                    insertion = doc_collection[c.nested_path[0]]
                    row = {UID: self.next_uid(), PARENT: uid, ORDER: order}
                    insertion.rows.append(row)

                # BE SURE TO NEST VALUES, IF NEEDED
                if value_type == "nested":
                    row[c.es_column] = "."
                    deeper_nested_path = [cname] + nested_path
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        insertion = doc_collection[cname] = Data(
                            active_columns=set(), rows=[])
                    for i, r in enumerate(v):
                        child_uid = self.next_uid()
                        _flatten(r, child_uid, uid, i, cname,
                                 deeper_nested_path)
                elif value_type == "object":
                    row[c.es_column] = "."
                    _flatten(v,
                             uid,
                             parent_id,
                             order,
                             cname,
                             nested_path,
                             row=row)
                elif c.type:
                    row[c.es_column] = v
コード例 #6
0
ファイル: insert_table.py プロジェクト: pombredanne/jx-sqlite
        def _flatten(data,
                     uid,
                     parent_id,
                     order,
                     full_path,
                     nested_path,
                     row=None):
            """
            :param data: the data we are pulling apart
            :param uid: the uid we are giving this doc
            :param parent_id: the parent id of this (sub)doc
            :param order: the number of siblings before this one
            :param full_path: path to this (sub)doc
            :param nested_path: list of paths, deepest first
            :param row: we will be filling this
            :return:
            """
            insertion = doc_collection[nested_path[0]]
            if not row:
                row = {UID: uid, PARENT: parent_id, ORDER: order}
                insertion.rows.append(row)

            if isinstance(data, Mapping):
                for k, v in data.items():
                    cname = concat_field(full_path, k)
                    value_type = get_type(v)
                    if value_type is None:
                        continue

                    if value_type in STRUCT:
                        c = unwraplist(
                            [cc for cc in columns[cname] if cc.type in STRUCT])
                    else:
                        c = unwraplist([
                            cc for cc in columns[cname]
                            if cc.type == value_type
                        ])

                    if not c:
                        # WHAT IS THE NESTING LEVEL FOR THIS PATH?
                        deeper_nested_path = "."
                        for path, _ in nested_tables.items():
                            if startswith_field(cname, path) and len(
                                    deeper_nested_path) < len(path):
                                deeper_nested_path = path
                        if deeper_nested_path != nested_path[0]:
                            # I HIGHLY SUSPECT, THROUGH CALLING _flatten() AGAIN THE REST OF THIS BLOCK IS NOT NEEDED
                            nested_column = unwraplist([
                                cc
                                for cc in columns.get(deeper_nested_path, Null)
                                if cc.type in STRUCT
                            ])
                            insertion.active_columns.add(nested_column)
                            row[nested_column.es_column] = "."

                            nested_path = [deeper_nested_path] + nested_path
                            insertion = doc_collection.get(
                                nested_path[0], None)
                            if not insertion:
                                insertion = doc_collection[
                                    nested_path[0]] = Data(
                                        active_columns=set(), rows=[])
                            uid, parent_id, order = self.next_uid(), uid, 0
                            row = {UID: uid, PARENT: parent_id, ORDER: order}
                            insertion.rows.append(row)

                        c = Column(
                            names={self.name: cname},
                            type=value_type,
                            es_column=typed_column(cname, value_type),
                            es_index=self.
                            name,  # THIS MAY BE THE WRONG TABLE, IF THIS PATH IS A NESTED DOC
                            nested_path=nested_path)
                        self.add_column_to_schema(self.nested_tables, c)
                        if value_type == "nested":
                            nested_tables[cname] = "fake table"

                        required_changes.append({"add": c})

                        # INSIDE IF BLOCK BECAUSE WE DO NOT WANT IT TO ADD WHAT WE columns.get() ALREADY
                        insertion.active_columns.add(c)

                    # BE SURE TO NEST VALUES, IF NEEDED
                    if value_type == "nested":
                        row[c.es_column] = "."
                        deeper_nested_path = [cname] + nested_path
                        insertion = doc_collection.get(cname, None)
                        if not insertion:
                            insertion = doc_collection[cname] = Data(
                                active_columns=set(), rows=[])
                        for i, r in enumerate(v):
                            child_uid = self.next_uid()
                            _flatten(r, child_uid, uid, i, cname,
                                     deeper_nested_path)
                    elif value_type == "object":
                        row[c.es_column] = "."
                        _flatten(v,
                                 uid,
                                 parent_id,
                                 order,
                                 cname,
                                 nested_path,
                                 row=row)
                    elif c.type:
                        row[c.es_column] = v
            else:
                k = "."
                v = data
                cname = concat_field(full_path, k)
                value_type = get_type(v)
                if value_type is None:
                    return

                if value_type in STRUCT:
                    c = unwraplist(
                        [c for c in self.columns if c.type in STRUCT])
                else:
                    try:
                        c = unwraplist(
                            [c for c in self.columns if c.type == value_type])
                    except Exception, e:
                        Log.error("not expected", cause=e)

                if not c:
                    c = Column(names={self.name: cname},
                               type=value_type,
                               es_column=typed_column(cname, value_type),
                               es_index=self.name,
                               nested_path=nested_path)
                    self.add_column_to_schema(columns, c)
                    if value_type == "nested":
                        nested_tables[cname] = "fake table"
                    required_changes.append({"add": c})

                insertion.active_columns.add(c)

                if value_type == "nested":
                    if c.type == "object":
                        # WE CAN FIX THIS,
                        Log.error("fix this")

                    row[c.es_column] = "."
                    deeper_nested_path = [cname] + nested_path
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        doc_collection[cname] = Data(active_columns=set(),
                                                     rows=[])
                    for i, r in enumerate(v):
                        child_uid = self.next_uid()
                        _flatten(r, child_uid, uid, i, cname,
                                 deeper_nested_path)
                elif value_type == "object":
                    if c.type == "nested":
                        # MOVE TO SINGLE-VALUED LIST
                        child_uid = self.next_uid()
                        row[c.es_column] = "."
                        deeper_nested_path = [cname] + nested_path
                        _flatten(v, child_uid, uid, 0, cname,
                                 deeper_nested_path)
                    else:
                        row[c.es_column] = "."
                        _flatten(v,
                                 uid,
                                 parent_id,
                                 order,
                                 nested_path,
                                 row=row)
                elif c.type:
                    row[c.es_column] = v
コード例 #7
0
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command['clear']))
        for c in self.schema.columns:
            if c.name in touched_columns and len(c.nested_path) > 1:
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.jx_type not in STRUCT
        }
        where_sql = where.map(_map).to_sql(self.schema)
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(name=new_column_name,
                            jx_type=ctype,
                            es_index=self.name,
                            es_type=json_type_to_sqlite_type(ctype),
                            es_column=typed_column(new_column_name, ctype),
                            last_updated=Date.now())
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = concat_field(self.name, nested_column_name)
                nested_table = nested_tables[nested_column_name]
                self_primary_key = sql_list(
                    quote_column(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID + text(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = (
                    SQL_DELETE + SQL_FROM + quote_column(nested_table.name) +
                    SQL_WHERE + "EXISTS" +
                    sql_iso(SQL_SELECT + SQL_ONE + SQL_FROM +
                            sql_alias(quote_column(nested_table.name), "n") +
                            SQL_INNER_JOIN +
                            sql_iso(SQL_SELECT + self_primary_key + SQL_FROM +
                                    quote_column(abs_schema.fact) + SQL_WHERE +
                                    where_sql) + " t ON " +
                            SQL_AND.join(
                                quote_column("t", c.es_column) + SQL_EQ +
                                quote_column("n", c.es_column)
                                for u in self.uid for c in self.columns[u])))
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Data(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = SQL_INSERT + quote_column(nested_table.name) + sql_iso(
                    sql_list([self_primary_key] + [quote_column(extra_key)] + [
                        quote_column(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns
                    ]))

                # BUILD THE PARENT TABLES
                parent = (SQL_SELECT + self_primary_key + SQL_FROM +
                          quote_column(abs_schema.fact) + SQL_WHERE +
                          jx_expression(command.where).to_sql(schema))

                # BUILD THE RECORDS
                children = SQL_UNION_ALL.join(
                    SQL_SELECT + quote_value(i) + " " +
                    quote_column(extra_key.es_column) + "," + sql_list(
                        quote_value(row[c.name]) + " " +
                        quote_column(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = (prefix + SQL_SELECT + sql_list([
                    quote_column("p", c.es_column) for u in self.uid
                    for c in self.columns[u]
                ] + [quote_column("c", extra_key)] + [
                    quote_column("c", c.es_column)
                    for c in doc_collection.get(".", Null).active_columns
                ]) + SQL_FROM + sql_iso(parent) + " p" + SQL_INNER_JOIN +
                               sql_iso(children) + " c" + " ON " + SQL_TRUE)

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(name=c.name,
                                        jx_type=c.jx_type,
                                        es_type=c.es_type,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        c.nested_path,
                                        last_updated=Date.now())
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.jx_type not in [
                                c.jx_type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = (
            SQL_UPDATE + quote_column(abs_schema.fact) + SQL_SET + sql_list([
                quote_column(c) + SQL_EQ +
                quote_value(get_if_type(v, c.jx_type))
                for k, v in command.set.items() if get_type(v) != "nested"
                for c in self.columns[k]
                if c.jx_type != "nested" and len(c.nested_path) == 1
            ] + [
                quote_column(c) + SQL_EQ + SQL_NULL
                for k in listwrap(command['clear']) if k in self.columns
                for c in self.columns[k]
                if c.jx_type != "nested" and len(c.nested_path) == 1
            ]) + SQL_WHERE + where_sql)

        self.db.execute(command)
コード例 #8
0
        def _flatten(data,
                     uid,
                     parent_id,
                     order,
                     full_path,
                     nested_path,
                     row=None,
                     guid=None):
            """
            :param data: the data we are pulling apart
            :param uid: the uid we are giving this doc
            :param parent_id: the parent id of this (sub)doc
            :param order: the number of siblings before this one
            :param full_path: path to this (sub)doc
            :param nested_path: list of paths, deepest first
            :param row: we will be filling this
            :return:
            """
            table = concat_field(self.name, nested_path[0])
            insertion = doc_collection[nested_path[0]]
            if not row:
                row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order}
                insertion.rows.append(row)

            if isinstance(data, Mapping):
                items = ((concat_field(full_path, k), v)
                         for k, v in wrap(data).leaves())
            else:
                # PRIMITIVE VALUES
                items = [(full_path, data)]

            for cname, v in items:
                value_type = get_type(v)
                if value_type is None:
                    continue

                if value_type == NESTED:
                    c = unwraplist([
                        cc for cc in snowflake.columns if cc.jx_type in STRUCT
                        and untyped_column(cc.name) == cname
                    ])
                else:
                    c = unwraplist([
                        cc for cc in snowflake.columns
                        if cc.jx_type == value_type and cc.name == cname
                    ])

                insertion = doc_collection[nested_path[0]]
                if not c:
                    # WHAT IS THE NESTING LEVEL FOR THIS PATH?
                    deeper_nested_path = "."
                    for path in snowflake.query_paths:
                        if startswith_field(cname, path[0]) and len(
                                deeper_nested_path) < len(path):
                            deeper_nested_path = path

                    c = Column(name=cname,
                               jx_type=value_type,
                               es_type=json_type_to_sqlite_type.get(
                                   value_type, value_type),
                               es_column=typed_column(
                                   cname,
                                   json_type_to_sql_type.get(value_type)),
                               es_index=table,
                               nested_path=nested_path,
                               last_updated=Date.now())
                    if value_type == "nested":
                        snowflake.query_paths.append(c.es_column)
                        required_changes.append({'nest': (c, nested_path)})
                    else:
                        snowflake.columns.append(c)
                        required_changes.append({"add": c})

                        # INSIDE IF BLOCK BECAUSE WE DO NOT WANT IT TO ADD WHAT WE columns.get() ALREADY
                        insertion.active_columns.add(c)
                elif c.jx_type == "nested" and value_type == "object":
                    value_type = "nested"
                    v = [v]
                elif len(c.nested_path) < len(nested_path):
                    from_doc = doc_collection.get(c.nested_path[0], None)
                    column = c.es_column
                    from_doc.active_columns.remove(c)
                    snowflake._remove_column(c)
                    required_changes.append({"nest": (c, nested_path)})
                    deep_c = Column(name=cname,
                                    jx_type=value_type,
                                    es_type=json_type_to_sqlite_type.get(
                                        value_type, value_type),
                                    es_column=typed_column(
                                        cname,
                                        json_type_to_sql_type.get(value_type)),
                                    es_index=table,
                                    nested_path=nested_path,
                                    last_updated=Date.now())
                    snowflake._add_column(deep_c)
                    snowflake._drop_column(c)
                    from_doc.active_columns.remove(c)

                    for r in from_doc.rows:
                        r1 = unwrap(r)
                        if column in r1:
                            row1 = {
                                UID: self.container.next_uid(),
                                PARENT: r1["__id__"],
                                ORDER: 0,
                                column: r1[column]
                            }
                            insertion.rows.append(row1)
                elif len(c.nested_path) > len(nested_path):
                    insertion = doc_collection[c.nested_path[0]]
                    row = {
                        UID: self.container.next_uid(),
                        PARENT: uid,
                        ORDER: order
                    }
                    insertion.rows.append(row)

                # BE SURE TO NEST VALUES, IF NEEDED
                if value_type == "nested":
                    row[c.es_column] = "."
                    deeper_nested_path = [cname] + nested_path
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        insertion = doc_collection[cname] = Data(
                            active_columns=set(), rows=[])
                    for i, r in enumerate(v):
                        child_uid = self.container.next_uid()
                        _flatten(r, child_uid, uid, i, cname,
                                 deeper_nested_path)
                elif value_type == "object":
                    row[c.es_column] = "."
                    _flatten(v,
                             uid,
                             parent_id,
                             order,
                             cname,
                             nested_path,
                             row=row)
                elif c.jx_type:
                    insertion.active_columns.add(c)
                    row[c.es_column] = v
コード例 #9
0
    def __init__(self, name, db=None, uid=GUID, exists=False, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        Container.__init__(self, frum=None)
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()

        if not _config:
            from pyLibrary.queries.containers import config as _config
            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        self.name = name
        self.uid = listwrap(uid)
        self._next_uid = 1
        self._make_digits_table()

        self.uid_accessor = jx.get(self.uid)
        self.nested_tables = OrderedDict(
        )  # MAP FROM NESTED PATH TO Table OBJECT, PARENTS PROCEED CHILDREN
        self.nested_tables["."] = self
        self.columns = Index(
            keys=[join_field(["names", self.name])]
        )  # MAP FROM DOCUMENT ABS PROPERTY NAME TO THE SET OF SQL COLUMNS IT REPRESENTS (ONE FOR EACH REALIZED DATATYPE)

        if not exists:
            for u in self.uid:
                if u == GUID:
                    pass
                else:
                    c = Column(names={name: u},
                               type="string",
                               es_column=typed_column(u, "string"),
                               es_index=name)
                    self.add_column_to_schema(self.nested_tables, c)

            command = ("CREATE TABLE " + quote_table(name) + "(" +
                       (",".join([quoted_UID + " INTEGER"] + [
                           _quote_column(c) + " " + sql_types[c.type]
                           for u, cs in self.columns.items() for c in cs
                       ])) + ", PRIMARY KEY (" + (", ".join([quoted_UID] + [
                           _quote_column(c) for u in self.uid
                           for c in self.columns[u]
                       ])) + "))")

            self.db.execute(command)
        else:
            # LOAD THE COLUMNS
            command = "PRAGMA table_info(" + quote_table(name) + ")"
            details = self.db.query(command)

            for r in details:
                cname = untyped_column(r[1])
                ctype = r[2].lower()
                column = Column(names={name: cname},
                                type=ctype,
                                nested_path=['.'],
                                es_column=typed_column(cname, ctype),
                                es_index=name)

                self.add_column_to_schema(self.columns, column)