def _create_index(self): # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk # every time we check it does a HEAD req if self.last_index_value and (datetime.utcnow() - self.last_index_check ) < timedelta(minutes=2): return self.last_index_value idx = self._current_index() if not self.handle.indices.exists(idx): logger.info('Creating new index') index = Index(idx) index.aliases(live={}) index.doc_type(Indicator) index.settings(max_result_window=WINDOW_LIMIT) try: index.create() # after implementing auth to use cif_store, there appears to sometimes be a race condition # where both the normal store and the auth store don't see the index and then try to create simultaneously. # gracefully handle if that happens except elasticsearch.exceptions.TransportError as e: if (e.error.startswith('IndexAlreadyExistsException') or e.error.startswith('index_already_exists_exception')): pass else: raise self.handle.indices.flush(idx) self.last_index_check = datetime.utcnow() self.last_index_value = idx return idx
def create_index( conn, version=None, minor_version=None, setup_write_alias=True, setup_read_alias=True, ): index_name = get_index_name( conn, version=version, minor_version=minor_version, bump_minor=minor_version is None, ) index = Index(index_name, using=conn) aliases = {} if setup_write_alias: write_alias = get_write_alias() aliases[write_alias] = {} if setup_read_alias: read_alias = get_read_alias() aliases[read_alias] = {} if aliases: index.aliases(**aliases) index.settings( index={'analysis': default_search.get_analysis_definition()}) # register the doc types index.doc_type(ReplyV1) index.create() return index
def setup_indexes(doc_type, reindex=False, force=False): """ Set up an index given a doc_type (e.g. files, projects). The behavior of the function is as follows: - If an index exists under the provided alias and force=False, just return the existing index. - If an index exists under the provided alias and force=True, then delete any indices under that alias and create a new index with that alias and the provided name. - If an index does not exist under the provided alias, then create a new index with that alias and the provided name. """ baseName = settings.ES_INDEX_PREFIX.format(doc_type) indexName = '{}-{}'.format(baseName, index_time_string()) alias = baseName if reindex: alias += '-reindex' index = Index(alias) if force or not index.exists(): # If an index exists under the alias and force=True, delete any indices # with that alias. while index.exists(): Index(list(index.get_alias().keys())[0]).delete(ignore=404) index = Index(alias) # Create a new index with the provided name. index = Index(indexName) # Alias this new index with the provided alias key. aliases = {alias: {}} index.aliases(**aliases) return index
def construct_index(cls, opts, bases): i = None if opts is None: # Inherit Index from base classes for b in bases: if getattr(b, "_index", DEFAULT_INDEX) is not DEFAULT_INDEX: parent_index = b._index i = Index( parent_index._name, doc_type=parent_index._mapping.doc_type, using=parent_index._using, ) i._settings = parent_index._settings.copy() i._aliases = parent_index._aliases.copy() i._analysis = parent_index._analysis.copy() i._doc_types = parent_index._doc_types[:] break if i is None: i = Index( getattr(opts, "name", "*"), doc_type=getattr(opts, "doc_type", "doc"), using=getattr(opts, "using", "default"), ) i.settings(**getattr(opts, "settings", {})) i.aliases(**getattr(opts, "aliases", {})) for a in getattr(opts, "analyzers", ()): i.analyzer(a) return i
def _init_index(index_config, force): index = Index(index_config['name']) aliases = {} for alias_val in index_config['alias']: if isinstance(alias_val, basestring): aliases[alias_val] = {} else: aliases[alias_val['name']] = alias_val['config'] index.aliases(**aliases) if force: index.delete(ignore=404) try: index.create() except TransportError as err: if err.status_code == 404: logger.debug('Index already exists, initializing document') index.close() for document_config in index_config['documents']: module_str, class_str = document_config['class'].rsplit('.', 1) module = import_module(module_str) cls = getattr(module, class_str) index.doc_type(cls) cls.init() index.open() return index
def setup_index(year): index = Index(f'{INDEX_NAME}-{year}') index.settings(number_of_shards=2, number_of_replicas=0) index.aliases(politicians={}) index.document(Politicians) index.analyzer(brazilian_analyzer) index.create()
def test_aliases_add_to_object(): random_alias = ''.join((choice(string.ascii_letters) for _ in range(100))) alias_dict = {random_alias: {}} index = Index('i', using='alias') index.aliases(**alias_dict) assert index._aliases == alias_dict
def test_aliases_returned_from_to_dict(): random_alias = ''.join((choice(string.ascii_letters) for _ in range(100))) alias_dict = {random_alias: {}} index = Index('i', using='alias') index.aliases(**alias_dict) assert index._aliases == index.to_dict()['aliases'] == alias_dict
def create_index(self, name, doc_type, alias): index = Index(name) index.document(doc_type) index.analyzer(analyzer('english')) # give the index an alias (e.g, `company_alias`), so the index is used # when the application searches from or inserts into `campaign_alias`. index.aliases(**{alias: {}}) # same as .aliases(company-alias: {}) index.create() doc_type._index = index return index
def construct_index(cls, opts, bases): i = Index( getattr(opts, "name", "*"), doc_type=getattr(opts, "doc_type", "doc"), using=getattr(opts, "using", "default"), ) i.settings(**getattr(opts, "settings", {})) i.aliases(**getattr(opts, "aliases", {})) for a in getattr(opts, "analyzers", ()): i.analyzer(a) return i
def create_index(self, name, document, alias): index = Index(name) index.document(document) index.analyzer(analyzer('english')) # give the index an alias (e.g, `company_alias`), so the index is used # when the application searches from or inserts into `campaign_alias`. index.aliases(**{alias: {}}) # same as .aliases(company-alias: {}) index.create() document._index = index self.stdout.write(self.style.SUCCESS('New index created')) return index
def _create_index(self): dt = datetime.utcnow() dt = dt.strftime('%Y.%m') es = connections.get_connection() if not es.indices.exists('indicators-{}'.format(dt)): index = Index('indicators-{}'.format(dt)) index.aliases(live={}) index.doc_type(Indicator) index.create() m = Mapping('indicator') m.field('indicator_ipv4', 'ip') m.field('indicator_ipv4_mask', 'integer') m.save('indicators-{}'.format(dt)) return 'indicators-{}'.format(dt)
def setup_indices(): index = Index(f'{INDEX_NAME}-index') index.settings(number_of_shards=1, number_of_replicas=0) index.aliases(politicians={}) index.document(Politicians) index.analyzer(analyzer('brazilian')) index_template = Politicians._index.as_template( INDEX_NAME, f'{INDEX_NAME}-*', ) index_template.save()
def _create_index(): # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk idx = _current_index() es = connections.get_connection() if not es.indices.exists(idx): index = Index(idx) index.aliases(live={}) index.doc_type(Indicator) index.create() m = Mapping('indicator') m.field('indicator_ipv4', 'ip') m.field('indicator_ipv4_mask', 'integer') m.field('lasttime', 'date') m.save(idx) return idx
def _create_index(self): # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk dt = datetime.utcnow() dt = dt.strftime('%Y.%m') es = connections.get_connection() if not es.indices.exists('indicators-{}'.format(dt)): index = Index('indicators-{}'.format(dt)) index.aliases(live={}) index.doc_type(Indicator) index.create() m = Mapping('indicator') m.field('indicator_ipv4', 'ip') m.field('indicator_ipv4_mask', 'integer') m.save('indicators-{}'.format(dt)) return 'indicators-{}'.format(dt)
def _create_index(self): # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk idx = self._current_index() # every time we check it does a HEAD req if (datetime.utcnow() - self.last_index_check) < timedelta(minutes=2): return idx if not self.handle.indices.exists(idx): index = Index(idx) index.aliases(live={}) index.doc_type(Indicator) index.settings(max_result_window=WINDOW_LIMIT) index.create() self.handle.indices.flush(idx) self.last_index_check = datetime.utcnow() return idx
def create_index(index_name, mapping, alias_names=()): """ Creates an index, initialises it with a mapping, and optionally associates aliases with it. Note: If you need to perform multiple alias operations atomically, you should use start_alias_transaction() instead of specifying aliases when creating an index. """ index = Index(index_name, mapping.doc_type) for analyzer in ANALYZERS: index.analyzer(analyzer) index.settings(**settings.ES_INDEX_SETTINGS) index.mapping(mapping) # ES allows you to specify filter criteria for aliases but we don't make use of that – # hence the empty dict for each alias alias_mapping = {alias_name: {} for alias_name in alias_names} index.aliases(**alias_mapping) index.create()
def generate_template(index_name): """ Generates the index template associated with the structure of the BuildResults document, allowing it to be uploaded to an ElasticSearch instance. Args: index_name: index name to generate the template with, should be the index the module will upload to output_file: (optional) file path to write template to """ document = _BuildResultsMetaDocument() index = Index(name=index_name) index.document(document) index.settings(refresh_interval="30s", number_of_shards="1", number_of_replicas="1") index.aliases(**{index_name: {}}) index_template = index.as_template(template_name="template_" + index_name, pattern="%s-*" % index_name) return index_template.to_dict()
def _create_index(self): # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk # every time we check it does a HEAD req if self.last_index_value and (datetime.utcnow() - self.last_index_check) < timedelta(minutes=2): return self.last_index_value idx = self._current_index() if not self.handle.indices.exists(idx): index = Index(idx) index.aliases(live={}) index.doc_type(Indicator) index.settings(max_result_window=WINDOW_LIMIT) index.create() self.handle.indices.flush(idx) self.last_index_check = datetime.utcnow() self.last_index_value = idx return idx
def _create_index(self, force=False): idx = self._current_index() # every time we check it does a HEAD req if not force and ((datetime.utcnow() - self.last_index_check) < timedelta(minutes=1)): return idx if not self.handle().indices.exists(idx): logger.debug(f"building index: {idx}") index = Index(idx) index.aliases(live={}) index.document(Indicator) index.settings(max_result_window=WINDOW_LIMIT, number_of_shards=SHARDS, number_of_replicas=REPLICAS) index.create() self.handle().indices.flush(idx) self.last_index_check = datetime.utcnow() return idx
def register(self, name=None, version=None, settings=None): """ Register an index locally. Note that `createall` is needed to save the index to Elasticsearch. The index will be named per convention such that: - The graph's name is used by default - The "test" suffix is added for unit testing (to avoid clobbering real data) If version is provided, it will be used to create generate an alias (to the unversioned name). """ if version is None: index_name = IndexRegistry.name_for(self.graph, name=name) alias_name = None else: # create index with full version, alias to shortened version index_name = IndexRegistry.name_for(self.graph, name=name, version=version) alias_name = IndexRegistry.name_for(self.graph, name=name) if index_name in self.indexes: raise Exception( "Index already registered for name: {}".format(index_name)) index = Index( name=index_name, using=self.graph.elasticsearch_client, ) if settings: index.settings(**settings) if alias_name is not None: index.aliases(**{alias_name: {}}) self.indexes[index_name] = index return index
def setup_index(index_config, force=False, reindex=False): """ Set up an index from a config dict. The behavior of the function is as follows: - If an index exists under the provided alias and force=False, just return the existing index. - If an index exists under the provided alias and force=True, then delete any indices under that alias and create a new index with that alias and the provided name. - If an index does not exist under the provided alias, then create a new index with that alias and the provided name. """ alias = index_config['alias'][0] if reindex: alias = alias + '_reindex' time_now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f") name = '{}_{}'.format(index_config['alias'][0], time_now) index = Index(alias) if force or not index.exists(): # If an index exists under the alias and force=True, delete any indices # with that alias. while index.exists(): index.delete(ignore=404) index = Index(alias) # Create a new index with the provided name. index = Index(name) # Alias this new index with the provided alias key. aliases = {alias: {}} index.aliases(**aliases) for document_config in index_config['documents']: module_str, class_str = document_config['class'].rsplit('.', 1) module = import_module(module_str) cls = getattr(module, class_str) index.doc_type(cls) index.create()
def setup_index(index_config, force=False, reindex=False): """ Set up an index from a config dict. The behavior of the function is as follows: - If an index exists under the provided alias and force=False, just return the existing index. - If an index exists under the provided alias and force=True, then delete any indices under that alias and create a new index with that alias and the provided name. - If an index does not exist under the provided alias, then create a new index with that alias and the provided name. """ alias = index_config['alias'] if reindex: alias += '-reindex' time_now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f") index_name = '{}-{}'.format(alias, time_now) index = Index(alias, using=es_client) if force or not index.exists(): # If an index exists under the alias and force=True, delete any indices # with that alias. while index.exists(): Index(list(index.get_alias().keys())[0]).delete(ignore=404) index = Index(alias) # Create a new index with the provided name. index = Index(index_name, using=es_client) # Alias this new index with the provided alias key. aliases = {alias: {}} index.aliases(**aliases) module_str, class_str = index_config['document'].rsplit('.', 1) module = import_module(module_str) cls = getattr(module, class_str) index.document(cls) index.settings(**index_config['kwargs']) index.create()
def _create_index(self, force=False): # https://github.com/csirtgadgets/massive-octo-spice/blob/develop/elasticsearch/observables.json # http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.bulk idx = self._current_index() # every time we check it does a HEAD req if not force and ((datetime.utcnow() - self.last_index_check) < timedelta(minutes=1)): return idx if not self.handle().indices.exists(idx): logger.debug(f"building index: {idx}") index = Index(idx) index.aliases(live={}) index.document(Indicator) index.settings(max_result_window=WINDOW_LIMIT, number_of_shards=SHARDS, number_of_replicas=REPLICAS) index.create() self.handle().indices.flush(idx) self.last_index_check = datetime.utcnow() return idx
def register(self, name=None, version=None, settings=None): """ Register an index locally. Note that `createall` is needed to save the index to Elasticsearch. The index will be named per convention such that: - The graph's name is used by default - The "test" suffix is added for unit testing (to avoid clobbering real data) If version is provided, it will be used to create generate an alias (to the unversioned name). """ if version is None: index_name = IndexRegistry.name_for(self.graph, name=name) alias_name = None else: # create index with full version, alias to shortened version index_name = IndexRegistry.name_for(self.graph, name=name, version=version) alias_name = IndexRegistry.name_for(self.graph, name=name) if index_name in self.indexes: raise Exception("Index already registered for name: {}".format(index_name)) index = Index( name=index_name, using=self.graph.elasticsearch_client, ) if settings: index.settings(**settings) if alias_name is not None: index.aliases(**{alias_name: {}}) self.indexes[index_name] = index return index