def __init__(self, host, index, type=None, alias=None, name=None, port=9200, read_only=True, typed=None, settings=None): Container.__init__(self, None) if not containers.config.default: containers.config.default.settings = settings self.settings = settings self.name = coalesce(name, alias, index) if read_only: self._es = elasticsearch.Alias(alias=coalesce(alias, index), settings=settings) else: self._es = elasticsearch.Cluster(settings=settings).get_index( read_only=read_only, settings=settings) self.meta = FromESMetadata(settings=settings) self.settings.type = self._es.settings.type self.edges = Dict() self.worker = None if typed == None: self._columns = self.get_columns(table_name=index) # SWITCH ON TYPED MODE self.typed = any(c.name in ("$value", "$object") for c in self._columns) else: self.typed = typed
def __init__(self, settings, queue_size=10000): self.settings = settings self.queue_size = queue_size self.indicies = {} # MAP DATE (AS UNIX TIMESTAMP) TO INDEX es = elasticsearch.Cluster( self.settings).get_or_create_index(settings=self.settings) es.add_alias(self.settings.index) es.set_refresh_interval(seconds=60 * 60) self.queue = es.threaded_queue(max_size=self.queue_size, batch_size=5000, silent=False) self.es = elasticsearch.Alias(alias=settings.index, settings=settings)
def __init__(self, host, index, type=None, alias=None, name=None, port=9200, settings=None): self.settings = settings self.name = coalesce(name, alias, index) self._es = elasticsearch.Alias(alias=coalesce(alias, index), settings=settings) self.settings.type = self._es.settings.type # Alias() WILL ASSIGN A TYPE IF IT WAS MISSING self.edges = Dict() self.worker = None self.ready = False
def __init__( self, host, index, type=None, alias=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None): Container.__init__(self) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.settings = kwargs self.name = name = coalesce(name, alias, index) if read_only: self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs) else: self.es = elasticsearch.Cluster(kwargs=kwargs).get_index( read_only=read_only, kwargs=kwargs) self._namespace = ElasticsearchMetadata(kwargs=kwargs) self.settings.type = self.es.settings.type self.edges = Data() self.worker = None columns = self._namespace.get_snowflake( self.es.settings.alias).columns # ABSOLUTE COLUMNS is_typed = any(c.es_column == EXISTS_TYPE for c in columns) if typed == None: # SWITCH ON TYPED MODE self.typed = is_typed else: if is_typed != typed: Log.error( "Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) self.typed = typed
def __init__( self, host, index, type=None, alias=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None): Container.__init__(self, None) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.settings = kwargs self.name = coalesce(name, alias, index) if read_only: self._es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs) else: self._es = elasticsearch.Cluster(kwargs=kwargs).get_index( read_only=read_only, kwargs=kwargs) self.meta = FromESMetadata(kwargs=kwargs) self.settings.type = self._es.settings.type self.edges = Data() self.worker = None columns = self.meta.get_columns( table_name=coalesce(name, alias, index)) self._schema = Schema(coalesce(name, alias, index), columns) if typed == None: # SWITCH ON TYPED MODE self.typed = any( c.es_column.find("." + TYPE_PREFIX) != -1 for c in columns) else: self.typed = typed
def __init__( self, host, index, type=None, alias=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) consistency="one", # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None ): Container.__init__(self, None) if not containers.config.default: containers.config.default.settings = kwargs self.settings = kwargs self.name = coalesce(name, alias, index) if read_only: self._es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs) else: self._es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs) self.meta = FromESMetadata(kwargs=kwargs) self.settings.type = self._es.settings.type self.edges = Data() self.worker = None columns = self.get_columns(table_name=name) self._schema = Schema(columns) if typed == None: # SWITCH ON TYPED MODE self.typed = any(c.name in ("$value", "$object") for c in columns) else: self.typed = typed
def __init__( self, host, index, type=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None ): Container.__init__(self) if not container.config.default: container.config.default = { "type": "elasticsearch", "settings": unwrap(kwargs) } self.settings = kwargs self.name = name = coalesce(name, index) if read_only: self.es = elasticsearch.Alias(alias=index, kwargs=kwargs) else: self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs) self._namespace = ElasticsearchMetadata(kwargs=kwargs) self.settings.type = self.es.settings.type self.edges = Data() self.worker = None columns = self.snowflake.columns # ABSOLUTE COLUMNS is_typed = any(c.es_column == EXISTS_TYPE for c in columns) if typed == None: # SWITCH ON TYPED MODE self.typed = is_typed else: if is_typed != typed: Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) self.typed = typed if not typed: # ADD EXISTENCE COLUMNS all_paths = {".": None} # MAP FROM path TO parent TO MAKE A TREE def nested_path_of(v): if not v: return [] else: return [v] + nested_path_of(all_paths[v]) all = sort_using_key(set(step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p))) for step in sorted(all): if step in all_paths: continue else: best = '.' for candidate in all_paths.keys(): if startswith_field(step, candidate): if startswith_field(candidate, best): best = candidate all_paths[step] = best for p in all_paths.keys(): nested_path = nested_path_of(all_paths[p]) if not nested_path: nested_path = ['.'] self.namespace.meta.columns.add(Column( name=p, es_column=p, es_index=self.name, es_type=OBJECT, jx_type=EXISTS, nested_path=nested_path, last_updated=Date.now() ))