Beispiel #1
0
    def __init__(self,
                 host,
                 index,
                 type=None,
                 alias=None,
                 name=None,
                 port=9200,
                 read_only=True,
                 typed=None,
                 settings=None):
        Container.__init__(self, None)
        if not containers.config.default:
            containers.config.default.settings = settings
        self.settings = settings
        self.name = coalesce(name, alias, index)
        if read_only:
            self._es = elasticsearch.Alias(alias=coalesce(alias, index),
                                           settings=settings)
        else:
            self._es = elasticsearch.Cluster(settings=settings).get_index(
                read_only=read_only, settings=settings)

        self.meta = FromESMetadata(settings=settings)
        self.settings.type = self._es.settings.type
        self.edges = Dict()
        self.worker = None
        if typed == None:
            self._columns = self.get_columns(table_name=index)
            # SWITCH ON TYPED MODE
            self.typed = any(c.name in ("$value", "$object")
                             for c in self._columns)
        else:
            self.typed = typed
Beispiel #2
0
    def __init__(self, settings, queue_size=10000):
        self.settings = settings
        self.queue_size = queue_size
        self.indicies = {}  # MAP DATE (AS UNIX TIMESTAMP) TO INDEX

        es = elasticsearch.Cluster(
            self.settings).get_or_create_index(settings=self.settings)
        es.add_alias(self.settings.index)
        es.set_refresh_interval(seconds=60 * 60)
        self.queue = es.threaded_queue(max_size=self.queue_size,
                                       batch_size=5000,
                                       silent=False)
        self.es = elasticsearch.Alias(alias=settings.index, settings=settings)
Beispiel #3
0
 def __init__(self,
              host,
              index,
              type=None,
              alias=None,
              name=None,
              port=9200,
              settings=None):
     self.settings = settings
     self.name = coalesce(name, alias, index)
     self._es = elasticsearch.Alias(alias=coalesce(alias, index),
                                    settings=settings)
     self.settings.type = self._es.settings.type  # Alias() WILL ASSIGN A TYPE IF IT WAS MISSING
     self.edges = Dict()
     self.worker = None
     self.ready = False
Beispiel #4
0
    def __init__(
            self,
            host,
            index,
            type=None,
            alias=None,
            name=None,
            port=9200,
            read_only=True,
            timeout=None,  # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
            wait_for_active_shards=1,  # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency)
            typed=None,
            kwargs=None):
        Container.__init__(self)
        if not container.config.default:
            container.config.default = {
                "type": "elasticsearch",
                "settings": unwrap(kwargs)
            }
        self.settings = kwargs
        self.name = name = coalesce(name, alias, index)
        if read_only:
            self.es = elasticsearch.Alias(alias=coalesce(alias, index),
                                          kwargs=kwargs)
        else:
            self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(
                read_only=read_only, kwargs=kwargs)

        self._namespace = ElasticsearchMetadata(kwargs=kwargs)
        self.settings.type = self.es.settings.type
        self.edges = Data()
        self.worker = None

        columns = self._namespace.get_snowflake(
            self.es.settings.alias).columns  # ABSOLUTE COLUMNS
        is_typed = any(c.es_column == EXISTS_TYPE for c in columns)

        if typed == None:
            # SWITCH ON TYPED MODE
            self.typed = is_typed
        else:
            if is_typed != typed:
                Log.error(
                    "Expecting given typed {{typed}} to match {{is_typed}}",
                    typed=typed,
                    is_typed=is_typed)
            self.typed = typed
Beispiel #5
0
    def __init__(
            self,
            host,
            index,
            type=None,
            alias=None,
            name=None,
            port=9200,
            read_only=True,
            timeout=None,  # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
            wait_for_active_shards=1,  # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency)
            typed=None,
            kwargs=None):
        Container.__init__(self, None)
        if not container.config.default:
            container.config.default = {
                "type": "elasticsearch",
                "settings": unwrap(kwargs)
            }
        self.settings = kwargs
        self.name = coalesce(name, alias, index)
        if read_only:
            self._es = elasticsearch.Alias(alias=coalesce(alias, index),
                                           kwargs=kwargs)
        else:
            self._es = elasticsearch.Cluster(kwargs=kwargs).get_index(
                read_only=read_only, kwargs=kwargs)

        self.meta = FromESMetadata(kwargs=kwargs)
        self.settings.type = self._es.settings.type
        self.edges = Data()
        self.worker = None

        columns = self.meta.get_columns(
            table_name=coalesce(name, alias, index))
        self._schema = Schema(coalesce(name, alias, index), columns)

        if typed == None:
            # SWITCH ON TYPED MODE
            self.typed = any(
                c.es_column.find("." + TYPE_PREFIX) != -1 for c in columns)
        else:
            self.typed = typed
Beispiel #6
0
    def __init__(
        self,
        host,
        index,
        type=None,
        alias=None,
        name=None,
        port=9200,
        read_only=True,
        timeout=None,  # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
        consistency="one",  # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency)
        typed=None,
        kwargs=None
    ):
        Container.__init__(self, None)
        if not containers.config.default:
            containers.config.default.settings = kwargs
        self.settings = kwargs
        self.name = coalesce(name, alias, index)
        if read_only:
            self._es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs)
        else:
            self._es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)

        self.meta = FromESMetadata(kwargs=kwargs)
        self.settings.type = self._es.settings.type
        self.edges = Data()
        self.worker = None

        columns = self.get_columns(table_name=name)
        self._schema = Schema(columns)

        if typed == None:
            # SWITCH ON TYPED MODE
            self.typed = any(c.name in ("$value", "$object") for c in columns)
        else:
            self.typed = typed
Beispiel #7
0
    def __init__(
        self,
        host,
        index,
        type=None,
        name=None,
        port=9200,
        read_only=True,
        timeout=None,  # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests)
        wait_for_active_shards=1,  # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency)
        typed=None,
        kwargs=None
    ):
        Container.__init__(self)
        if not container.config.default:
            container.config.default = {
                "type": "elasticsearch",
                "settings": unwrap(kwargs)
            }
        self.settings = kwargs
        self.name = name = coalesce(name, index)
        if read_only:
            self.es = elasticsearch.Alias(alias=index, kwargs=kwargs)
        else:
            self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs)

        self._namespace = ElasticsearchMetadata(kwargs=kwargs)
        self.settings.type = self.es.settings.type
        self.edges = Data()
        self.worker = None

        columns = self.snowflake.columns  # ABSOLUTE COLUMNS
        is_typed = any(c.es_column == EXISTS_TYPE for c in columns)

        if typed == None:
            # SWITCH ON TYPED MODE
            self.typed = is_typed
        else:
            if is_typed != typed:
                Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed)
            self.typed = typed

        if not typed:
            # ADD EXISTENCE COLUMNS
            all_paths = {".": None}  # MAP FROM path TO parent TO MAKE A TREE

            def nested_path_of(v):
                if not v:
                    return []
                else:
                    return [v] + nested_path_of(all_paths[v])

            all = sort_using_key(set(step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p)))
            for step in sorted(all):
                if step in all_paths:
                    continue
                else:
                    best = '.'
                    for candidate in all_paths.keys():
                        if startswith_field(step, candidate):
                            if startswith_field(candidate, best):
                                best = candidate
                    all_paths[step] = best
            for p in all_paths.keys():
                nested_path = nested_path_of(all_paths[p])
                if not nested_path:
                    nested_path = ['.']
                self.namespace.meta.columns.add(Column(
                    name=p,
                    es_column=p,
                    es_index=self.name,
                    es_type=OBJECT,
                    jx_type=EXISTS,
                    nested_path=nested_path,
                    last_updated=Date.now()
                ))