コード例 #1
0
ファイル: base_table.py プロジェクト: nknick99/MySQL-to-S3
    def __init__(self, name, db=None, uid=UID, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()

        if not _config:
            # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE
            from jx_base.container import config as _config
            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        self.sf = Snowflake(fact=name, uid=uid, db=db)

        self._next_guid = generateGuid()
        self._next_uid = 1
        self._make_digits_table()
        self.uid_accessor = jx.get(self.sf.uid)
コード例 #2
0
ファイル: base_table.py プロジェクト: klahnakoski/annotations
    def __init__(self, name, db=None, uid=UID, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        if isinstance(db, Sqlite):
            self.db = db
        else:
            self.db = db = Sqlite(db)

        if not _config:
            # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE
            from jx_base.container import config as _config

            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        ns = Namespace(db=db)
        self.facts = ns.create_or_replace_facts(fact_name=name)

        self._next_guid = generateGuid()
        self._next_uid = 1
        self._make_digits_table()
        self.uid_accessor = jx.get(uid)
コード例 #3
0
    def flatten_many(self, docs, path="."):
        """
        :param docs: THE JSON DOCUMENTS
        :param path: FULL PATH TO THIS (INNER/NESTED) DOCUMENT
        :return: TUPLE (success, command, doc_collection) WHERE
                 success: BOOLEAN INDICATING PROPER PARSING
                 command: SCHEMA CHANGES REQUIRED TO BE SUCCESSFUL NEXT TIME
                 doc_collection: MAP FROM NESTED PATH TO INSERTION PARAMETERS:
                 {"active_columns": list, "rows": list of objects}
        """

        # TODO: COMMAND TO ADD COLUMNS
        # TODO: COMMAND TO NEST EXISTING COLUMNS
        # COLLECT AS MANY doc THAT DO NOT REQUIRE SCHEMA CHANGE

        _insertion = Data(active_columns=Queue(), rows=[])
        doc_collection = {".": _insertion}
        # KEEP TRACK OF WHAT TABLE WILL BE MADE (SHORTLY)
        required_changes = []
        facts = self.container.get_or_create_facts(self.name)
        snowflake = facts.snowflake

        def _flatten(data,
                     uid,
                     parent_id,
                     order,
                     full_path,
                     nested_path,
                     row=None,
                     guid=None):
            """
            :param data: the data we are pulling apart
            :param uid: the uid we are giving this doc
            :param parent_id: the parent id of this (sub)doc
            :param order: the number of siblings before this one
            :param full_path: path to this (sub)doc
            :param nested_path: list of paths, deepest first
            :param row: we will be filling this
            :return:
            """
            table = concat_field(self.name, nested_path[0])
            insertion = doc_collection[nested_path[0]]
            if not row:
                row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order}
                insertion.rows.append(row)

            if is_data(data):
                items = [(concat_field(full_path, k), v)
                         for k, v in wrap(data).leaves()]
            else:
                # PRIMITIVE VALUES
                items = [(full_path, data)]

            for cname, v in items:
                jx_type = get_jx_type(v)
                if jx_type is None:
                    continue

                insertion = doc_collection[nested_path[0]]
                if jx_type == NESTED:
                    c = first(cc for cc in insertion.active_columns +
                              snowflake.columns if cc.jx_type in STRUCT
                              and untyped_column(cc.name)[0] == cname)
                else:
                    c = first(cc for cc in insertion.active_columns +
                              snowflake.columns
                              if cc.jx_type == jx_type and cc.name == cname)

                if isinstance(c, list):
                    Log.error("confused")

                if not c:
                    # WHAT IS THE NESTING LEVEL FOR THIS PATH?
                    deeper_nested_path = "."
                    for path in snowflake.query_paths:
                        if startswith_field(cname, path[0]) and len(
                                deeper_nested_path) < len(path):
                            deeper_nested_path = path

                    c = Column(name=cname,
                               jx_type=jx_type,
                               es_type=json_type_to_sqlite_type.get(
                                   jx_type, jx_type),
                               es_column=typed_column(
                                   cname, json_type_to_sql_type.get(jx_type)),
                               es_index=table,
                               cardinality=0,
                               nested_path=nested_path,
                               last_updated=Date.now())
                    if jx_type == NESTED:
                        snowflake.query_paths.append(c.es_column)
                        required_changes.append({'nest': c})
                    else:
                        insertion.active_columns.add(c)
                        required_changes.append({"add": c})
                elif c.jx_type == NESTED and jx_type == OBJECT:
                    # ALWAYS PROMOTE OBJECTS TO NESTED
                    jx_type = NESTED
                    v = [v]
                elif len(c.nested_path) < len(nested_path):
                    from_doc = doc_collection.get(c.nested_path[0], None)
                    column = c.es_column
                    from_doc.active_columns.remove(c)
                    snowflake._remove_column(c)
                    required_changes.append({"nest": c})
                    deep_c = Column(name=cname,
                                    jx_type=jx_type,
                                    es_type=json_type_to_sqlite_type.get(
                                        jx_type, jx_type),
                                    es_column=typed_column(
                                        cname,
                                        json_type_to_sql_type.get(jx_type)),
                                    es_index=table,
                                    nested_path=nested_path,
                                    last_updated=Date.now())
                    snowflake._add_column(deep_c)
                    snowflake._drop_column(c)
                    from_doc.active_columns.remove(c)

                    for r in from_doc.rows:
                        r1 = unwrap(r)
                        if column in r1:
                            row1 = {
                                UID: self.container.next_uid(),
                                PARENT: r1["__id__"],
                                ORDER: 0,
                                column: r1[column]
                            }
                            insertion.rows.append(row1)
                elif len(c.nested_path) > len(nested_path):
                    insertion = doc_collection[c.nested_path[0]]
                    row = {
                        UID: self.container.next_uid(),
                        PARENT: uid,
                        ORDER: order
                    }
                    insertion.rows.append(row)

                # BE SURE TO NEST VALUES, IF NEEDED
                if jx_type == NESTED:
                    deeper_nested_path = [cname] + nested_path
                    if not doc_collection.get(cname):
                        doc_collection[cname] = Data(active_columns=Queue(),
                                                     rows=[])
                    for i, r in enumerate(v):
                        child_uid = self.container.next_uid()
                        _flatten(r, child_uid, uid, i, cname,
                                 deeper_nested_path)
                elif jx_type == OBJECT:
                    _flatten(v,
                             uid,
                             parent_id,
                             order,
                             cname,
                             nested_path,
                             row=row)
                elif c.jx_type:
                    row[c.es_column] = v

        for doc in docs:
            _flatten(doc,
                     self.container.next_uid(),
                     0,
                     0,
                     full_path=path,
                     nested_path=["."],
                     guid=generateGuid())
            if required_changes:
                snowflake.change_schema(required_changes)
            required_changes = []

        return doc_collection