def __init__(self, name, db=None, uid=UID, kwargs=None): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ global _config if db: self.db = db else: self.db = db = Sqlite() if not _config: # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE from jx_base.container import config as _config if not _config.default: _config.default = {"type": "sqlite", "settings": {"db": db}} self.sf = Snowflake(fact=name, uid=uid, db=db) self._next_guid = generateGuid() self._next_uid = 1 self._make_digits_table() self.uid_accessor = jx.get(self.sf.uid)
def __init__(self, name, db=None, uid=UID, kwargs=None): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ global _config if isinstance(db, Sqlite): self.db = db else: self.db = db = Sqlite(db) if not _config: # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE from jx_base.container import config as _config if not _config.default: _config.default = {"type": "sqlite", "settings": {"db": db}} ns = Namespace(db=db) self.facts = ns.create_or_replace_facts(fact_name=name) self._next_guid = generateGuid() self._next_uid = 1 self._make_digits_table() self.uid_accessor = jx.get(uid)
def flatten_many(self, docs, path="."): """ :param docs: THE JSON DOCUMENTS :param path: FULL PATH TO THIS (INNER/NESTED) DOCUMENT :return: TUPLE (success, command, doc_collection) WHERE success: BOOLEAN INDICATING PROPER PARSING command: SCHEMA CHANGES REQUIRED TO BE SUCCESSFUL NEXT TIME doc_collection: MAP FROM NESTED PATH TO INSERTION PARAMETERS: {"active_columns": list, "rows": list of objects} """ # TODO: COMMAND TO ADD COLUMNS # TODO: COMMAND TO NEST EXISTING COLUMNS # COLLECT AS MANY doc THAT DO NOT REQUIRE SCHEMA CHANGE _insertion = Data(active_columns=Queue(), rows=[]) doc_collection = {".": _insertion} # KEEP TRACK OF WHAT TABLE WILL BE MADE (SHORTLY) required_changes = [] facts = self.container.get_or_create_facts(self.name) snowflake = facts.snowflake def _flatten(data, uid, parent_id, order, full_path, nested_path, row=None, guid=None): """ :param data: the data we are pulling apart :param uid: the uid we are giving this doc :param parent_id: the parent id of this (sub)doc :param order: the number of siblings before this one :param full_path: path to this (sub)doc :param nested_path: list of paths, deepest first :param row: we will be filling this :return: """ table = concat_field(self.name, nested_path[0]) insertion = doc_collection[nested_path[0]] if not row: row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order} insertion.rows.append(row) if is_data(data): items = [(concat_field(full_path, k), v) for k, v in wrap(data).leaves()] else: # PRIMITIVE VALUES items = [(full_path, data)] for cname, v in items: jx_type = get_jx_type(v) if jx_type is None: continue insertion = doc_collection[nested_path[0]] if jx_type == NESTED: c = first(cc for cc in insertion.active_columns + snowflake.columns if cc.jx_type in STRUCT and untyped_column(cc.name)[0] == cname) else: c = first(cc for cc in insertion.active_columns + snowflake.columns if cc.jx_type == jx_type and cc.name == cname) if isinstance(c, list): Log.error("confused") if not c: # WHAT IS THE NESTING LEVEL FOR THIS PATH? deeper_nested_path = "." for path in snowflake.query_paths: if startswith_field(cname, path[0]) and len( deeper_nested_path) < len(path): deeper_nested_path = path c = Column(name=cname, jx_type=jx_type, es_type=json_type_to_sqlite_type.get( jx_type, jx_type), es_column=typed_column( cname, json_type_to_sql_type.get(jx_type)), es_index=table, cardinality=0, nested_path=nested_path, last_updated=Date.now()) if jx_type == NESTED: snowflake.query_paths.append(c.es_column) required_changes.append({'nest': c}) else: insertion.active_columns.add(c) required_changes.append({"add": c}) elif c.jx_type == NESTED and jx_type == OBJECT: # ALWAYS PROMOTE OBJECTS TO NESTED jx_type = NESTED v = [v] elif len(c.nested_path) < len(nested_path): from_doc = doc_collection.get(c.nested_path[0], None) column = c.es_column from_doc.active_columns.remove(c) snowflake._remove_column(c) required_changes.append({"nest": c}) deep_c = Column(name=cname, jx_type=jx_type, es_type=json_type_to_sqlite_type.get( jx_type, jx_type), es_column=typed_column( cname, json_type_to_sql_type.get(jx_type)), es_index=table, nested_path=nested_path, last_updated=Date.now()) snowflake._add_column(deep_c) snowflake._drop_column(c) from_doc.active_columns.remove(c) for r in from_doc.rows: r1 = unwrap(r) if column in r1: row1 = { UID: self.container.next_uid(), PARENT: r1["__id__"], ORDER: 0, column: r1[column] } insertion.rows.append(row1) elif len(c.nested_path) > len(nested_path): insertion = doc_collection[c.nested_path[0]] row = { UID: self.container.next_uid(), PARENT: uid, ORDER: order } insertion.rows.append(row) # BE SURE TO NEST VALUES, IF NEEDED if jx_type == NESTED: deeper_nested_path = [cname] + nested_path if not doc_collection.get(cname): doc_collection[cname] = Data(active_columns=Queue(), rows=[]) for i, r in enumerate(v): child_uid = self.container.next_uid() _flatten(r, child_uid, uid, i, cname, deeper_nested_path) elif jx_type == OBJECT: _flatten(v, uid, parent_id, order, cname, nested_path, row=row) elif c.jx_type: row[c.es_column] = v for doc in docs: _flatten(doc, self.container.next_uid(), 0, 0, full_path=path, nested_path=["."], guid=generateGuid()) if required_changes: snowflake.change_schema(required_changes) required_changes = [] return doc_collection