def __init__(self, name, db=None, uid=UID, kwargs=None): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ global _config Container.__init__(self, frum=None) if db: self.db = db else: self.db = db = Sqlite() if not _config: # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE from jx_base.container import config as _config if not _config.default: _config.default = {"type": "sqlite", "settings": {"db": db}} self.sf = Snowflake(fact=name, uid=uid, db=db) self._next_guid = generateGuid() self._next_uid = 1 self._make_digits_table() self.uid_accessor = jx.get(self.sf.uid)
def __init__(self, name, data, schema=None): # TODO: STORE THIS LIKE A CUBE FOR FASTER ACCESS AND TRANSFORMATION data = list(unwrap(data)) Container.__init__(self) if schema == None: self._schema = get_schema_from_list(name, data) else: self._schema = schema self.name = coalesce(name, ".") self.data = data self.locker = Lock() # JUST IN CASE YOU WANT TO DO MORE THAN ONE THING
def __new__(cls, *args, **kwargs): if (len(args) == 1 and args[0].get("index") == "meta") or kwargs.get("index") == "meta": output = FromESMetadata.__new__(FromESMetadata, *args, **kwargs) output.__init__(*args, **kwargs) return output else: return Container.__new__(cls)
def __new__(cls, *args, **kwargs): if (len(args) == 1 and args[0].get("index") == "meta") or kwargs.get("index") == "meta": output = ElasticsearchMetadata.__new__(ElasticsearchMetadata, *args, **kwargs) output.__init__(*args, **kwargs) return output else: return Container.__new__(cls)
def __init__( self, host, index, type=None, alias=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None): Container.__init__(self) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.settings = kwargs self.name = name = coalesce(name, alias, index) if read_only: self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs) else: self.es = elasticsearch.Cluster(kwargs=kwargs).get_index( read_only=read_only, kwargs=kwargs) self._namespace = ElasticsearchMetadata(kwargs=kwargs) self.settings.type = self.es.settings.type self.edges = Data() self.worker = None columns = self._namespace.get_snowflake( self.es.settings.alias).columns # ABSOLUTE COLUMNS is_typed = any(c.es_column == EXISTS_TYPE for c in columns) if typed == None: # SWITCH ON TYPED MODE self.typed = is_typed else: if is_typed != typed: Log.error( "Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) self.typed = typed
def __init__( self, host, index, type=None, alias=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None): Container.__init__(self, None) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.settings = kwargs self.name = coalesce(name, alias, index) if read_only: self._es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs) else: self._es = elasticsearch.Cluster(kwargs=kwargs).get_index( read_only=read_only, kwargs=kwargs) self.meta = FromESMetadata(kwargs=kwargs) self.settings.type = self._es.settings.type self.edges = Data() self.worker = None columns = self.meta.get_columns( table_name=coalesce(name, alias, index)) self._schema = Schema(coalesce(name, alias, index), columns) if typed == None: # SWITCH ON TYPED MODE self.typed = any( c.es_column.find("." + TYPE_PREFIX) != -1 for c in columns) else: self.typed = typed
def __init__( self, host, index, type=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None ): Container.__init__(self) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.settings = kwargs self.name = name = coalesce(name, index) if read_only: self.es = elasticsearch.Alias(alias=index, kwargs=kwargs) else: self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs) self._namespace = ElasticsearchMetadata(kwargs=kwargs) self.settings.type = self.es.settings.type self.edges = Data() self.worker = None columns = self.snowflake.columns # ABSOLUTE COLUMNS is_typed = any(c.es_column == EXISTS_TYPE for c in columns) if typed == None: # SWITCH ON TYPED MODE self.typed = is_typed else: if is_typed != typed: Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) self.typed = typed
def __init__( self, host, index, type=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None ): Container.__init__(self) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.settings = kwargs self.name = name = coalesce(name, index) if read_only: self.es = elasticsearch.Alias(alias=index, kwargs=kwargs) else: self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs) self._namespace = ElasticsearchMetadata(kwargs=kwargs) self.settings.type = self.es.settings.type self.edges = Data() self.worker = None columns = self.snowflake.columns # ABSOLUTE COLUMNS is_typed = any(c.es_column == EXISTS_TYPE for c in columns) if typed == None: # SWITCH ON TYPED MODE self.typed = is_typed else: if is_typed != typed: Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) self.typed = typed if not typed: # ADD EXISTENCE COLUMNS all_paths = {".": None} # MAP FROM path TO parent TO MAKE A TREE def nested_path_of(v): if not v: return [] else: return [v] + nested_path_of(all_paths[v]) all = sort_using_key(set(step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p))) for step in sorted(all): if step in all_paths: continue else: best = '.' for candidate in all_paths.keys(): if startswith_field(step, candidate): if startswith_field(candidate, best): best = candidate all_paths[step] = best for p in all_paths.keys(): nested_path = nested_path_of(all_paths[p]) if not nested_path: nested_path = ['.'] self.namespace.meta.columns.add(Column( name=p, es_column=p, es_index=self.name, es_type=OBJECT, jx_type=EXISTS, nested_path=nested_path, last_updated=Date.now() ))
def __init__( self, host, index, # THE NAME OF THE SNOWFLAKE (IF WRITING) alias=None, # THE NAME OF THE SNOWFLAKE (FOR READING) type=None, name=None, # THE FULL NAME OF THE TABLE (THE NESTED PATH INTO THE SNOWFLAKE) port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None): Container.__init__(self) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.edges = Data() # SET EARLY, SO OTHER PROCESSES CAN REQUEST IT self.worker = None self.settings = kwargs self._namespace = ElasticsearchMetadata(kwargs=kwargs) self.name = name = self._namespace._find_alias( coalesce(alias, index, name)) if read_only: self.es = elasticsearch.Alias(alias=name, index=None, kwargs=kwargs) else: self.es = elasticsearch.Cluster(kwargs=kwargs).get_index( read_only=read_only, kwargs=kwargs) self._ensure_max_result_window_set(name) self.settings.type = self.es.settings.type self.stats = QueryStats(self.es.cluster) columns = self.snowflake.columns # ABSOLUTE COLUMNS is_typed = any(c.es_column == EXISTS_TYPE for c in columns) if typed == None: # SWITCH ON TYPED MODE self.typed = is_typed else: if is_typed != typed: Log.error( "Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) self.typed = typed if not typed: # ADD EXISTENCE COLUMNS all_paths = {'.': None} # MAP FROM path TO parent TO MAKE A TREE def nested_path_of(v): if v == '.': return ('.', ) return (v, ) + nested_path_of(all_paths[v]) query_paths = sort_using_key(set( step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p))) for step in query_paths: if step in all_paths: continue else: best = '.' for candidate in all_paths.keys(): if startswith_field(step, candidate): if startswith_field(candidate, best): best = candidate all_paths[step] = best for p in all_paths.keys(): if p == ".": nested_path = ('.', ) else: nested_path = nested_path_of(p)[1:] jx_type = (OBJECT if p == "." else NESTED) self.namespace.meta.columns.add( Column(name=p, es_column=p, es_index=self.name, es_type=jx_type, jx_type=jx_type, cardinality=1, nested_path=nested_path, multi=1001 if jx_type is NESTED else 1, last_updated=Date.now()))