def __init__(self, config_file='config.cfg'): super(Elastic, self).__init__() self.percentage=10.0 self.minimum_occurrences=250 # The ConfigParser documentation points out that there's no way to force defaults config option # outside the "DEFAULT" section. config = ConfigParser() config.read(config_file) if not config.has_section('elastic'): config.add_section('elastic') for option, value in {'use_ssl': 'True', 'host': '127.0.0.1', 'version': '2', 'index': 'nxapi', 'doc_type': 'events'}.items(): if not config.has_option('elastic', option): config.set('elastic', option, value) self.version = config.getint('elastic', 'version') self.index = config.get('elastic', 'index') use_ssl = config.getboolean('elastic', 'use_ssl') host = config.get('elastic', 'host') self.doc_type = config.get('elastic', 'doc_type') self.client = connections.create_connection(hosts=[host], use_ssl=use_ssl, index=self.index, version=self.version, doc_type=self.doc_type, timeout=30, retry_on_timeout=True ) Event.init(index=self.index) index = Index(self.index, using=self.client) index.doc_type(Event) self.initialize_search()
def test_registered_doc_type_included_in_search(): i = Index('i', using='alias') i.document(Post) s = i.search() assert s._doc_type == [Post]
def test_registered_doc_type_included_in_search(): i = Index('i', using='alias') i.doc_type(Post) s = i.search() assert s._doc_type_map == {'post': Post}
def es_delete_cmd(index_name): """Delete a specified index :arg index_name: name of index to delete """ indexes = [name for name, count in get_indexes()] if index_name not in indexes: log.error('Index "%s" is not a valid index.', index_name) if not indexes: log.error('There are no valid indexes.') else: log.error('Valid indexes: %s', ', '.join(indexes)) return ret = raw_input( 'Are you sure you want to delete "%s"? (yes/no) ' % index_name ) if ret != 'yes': return log.info('Deleting index "%s"...', index_name) index = Index(name=index_name, using='default') try: index.delete() except NotFoundError: pass log.info('Done!')
def create_index_if_does_not_exist(cls): index = Index(cls.INDEX_NAME) index.doc_type(cls) if not index.connection.indices.exists(cls.INDEX_NAME): index.create() time.sleep(1) # It takes some time to create the index
def test_analyzers_returned_from_to_dict(): random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100))) random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") index = Index('i', using='alias') index.analyzer(random_analyzer) assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}
def test_index_template_can_have_order(): i = Index('i-*') it = i.as_template('i', order=2) assert { "index_patterns": ["i-*"], "order": 2 } == it.to_dict()
def test_aliases_returned_from_to_dict(): random_alias = ''.join((choice(string.ascii_letters) for _ in range(100))) alias_dict = {random_alias: {}} index = Index('i', using='alias') index.aliases(**alias_dict) assert index._aliases == index.to_dict()['aliases'] == alias_dict
def get_index(name, doc_types, *, using, shards=1, replicas=0, interval="1s"): index = Index(name, using=using) for doc_type in doc_types: index.doc_type(doc_type) index.settings( number_of_shards=shards, number_of_replicas=replicas, refresh_interval=interval ) return index
def test_aliases_add_to_object(): random_alias = ''.join((choice(string.ascii_letters) for _ in range(100))) alias_dict = {random_alias: {}} index = Index('i', using='alias') index.aliases(**alias_dict) assert index._aliases == alias_dict
def test_index_can_be_saved_even_with_settings(write_client): i = Index('test-blog', using=write_client) i.settings(number_of_shards=3, number_of_replicas=0) i.save() i.settings(number_of_replicas=1) i.save() assert '1' == i.get_settings()['test-blog']['settings']['index']['number_of_replicas']
def initialize_index(self, delete_if_exists=False): """ Initialize index with mapping in ElasticSearch :param delete_if_exists: delete index, if exists :return: None """ def update_index_settings(): """ Function updates settings for slovenian lemmatization of words. As far as we know, elasticsearch-dsl library does not support custom filter settings. :return: None """ analysis_settings = { "analysis": { "filter": { "lemmagen_filter_sl": { "type": "lemmagen", "lexicon": "sl" } }, "analyzer": { "lemmagen_sl": { "type": "custom", "tokenizer": "uax_url_email", "filter": [ "lemmagen_filter_sl", "lowercase" ] } } } } self.client.cluster.health(index=self.index_name, wait_for_status='green', request_timeout=2) self.client.indices.close(index=self.index_name) self.client.indices.put_settings(json.dumps(analysis_settings), index=self.index_name) self.client.indices.open(index=self.index_name) index = Index(self.index_name, using=self.client) if delete_if_exists and index.exists(): index.delete() index.settings( # use higher number in production number_of_replicas=0 ) # register models index.doc_type(Document) index.create() update_index_settings() # set lemmanizer
def run(self, *args, **options): self.confirm( u"Are you really sure you want to delete the index '{0}' ?" .format(self.index_name) ) index = Index(self.index_name) if not self.dry_run: index.delete() self.print_success(u"Index {0} deleted.".format(self.index_name))
def drop_index(silent=True): """Remove the ElasticSearch index. """ index = Index(elasticsearch_config['index']) try: index.delete() except Exception as exc: if not silent: raise exc
def recreate_index(self): """ Delete and then create a given index and set a default mapping. :param index: [string] name of the index. If None a default is used """ submission = Index(self.index) submission.delete(ignore=404) ESSubmission.init()
def test_delete(write_client): write_client.indices.create( index='test-index', body={'settings': {'number_of_replicas': 0, 'number_of_shards': 1}} ) i = Index('test-index', using=write_client) i.delete() assert not write_client.indices.exists(index='test-index')
class BaseSearchTestCase(TestCase): def setUp(self): from django.conf import settings SEARCH = getattr(settings, 'SEARCH') connections.create_connection('testing', **SEARCH['default']['connections']) self.index = Index(SEARCH['default']['index'], using='testing') # This is needed for test_documents, but has side effects in all running tests doctypes_list = ( value for name, value in inspect.getmembers(documents) if not name.startswith('_') and inspect.isclass(value) and issubclass(value, DocType) and name != DocType.__name__ ) for doctype in doctypes_list: # Remove assigned index doctype._doc_type.index = None # Associate docs with test index self.index.doc_type(doctype) if self.index.exists(): self.index.delete(ignore=404) self.index.create() self.search = Search(index=SEARCH['default']['index']) def tearDown(self): self.index.delete() queue = django_rq.get_queue() queue.empty()
def test_cloned_index_has_copied_settings_and_using(): client = object() i = Index('my-index', using=client) i.settings(number_of_shards=1) i2 = i.clone('my-other-index') assert 'my-other-index' == i2._name assert client is i2._using assert i._settings == i2._settings assert i._settings is not i2._settings
def test_create_index_manually(self): out = io.StringIO() index_name = 'test_manually_created_index' call_command('create_index', index_name, stdout=out) self.assertIn("Created search index '{}'".format(index_name), out.getvalue()) index = Index(index_name) self.assertTrue(index.exists()) index.delete() self.assertFalse(index.exists())
def test_registered_doc_type_included_in_to_dict(): i = Index('i', using='alias') i.document(Post) assert { 'mappings': { 'properties': { 'title': {'type': 'text'}, 'published_from': {'type': 'date'}, } } } == i.to_dict()
def test_create_index_usings_settings(self): out = io.StringIO() call_command('create_index', stdout=out) self.assertIn("Creating search indices from settings", out.getvalue()) self.assertIn("Created search index '{}'".format(self.settings['default']['index']), out.getvalue()) index = Index(self.settings['default']['index']) self.assertTrue(index.exists()) index.delete() self.assertFalse(index.exists())
def test_conflicting_mapping_raises_error_in_index_to_dict(): class A(document.Document): name = field.Text() class B(document.Document): name = field.Keyword() i = Index('i') i.document(A) i.document(B) with raises(ValueError): i.to_dict()
def applyConfig(self): try: print("Connecting to '%s', index '%s'" % (self.confESHost, self.confESIndex)) res = connections.create_connection(hosts=[self.confESHost]) idx = Index(self.confESIndex) idx.doc_type(DocHTTPRequestResponse) DocHTTPRequestResponse.init() try: idx.create() except: pass except Exception as e: JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>" % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)
def create_index(hosts, index): i = set_hosts_index(hosts=hosts, index=index) logprint('debug', 'creating new index') i = Index(index) i.create() logprint('debug', 'registering doc types') i.doc_type(Author) i.doc_type(Page) i.doc_type(Source) logprint('debug', 'DONE')
def test_es_create_documents(self): # Index name required. with self.assertRaises(SystemExit): call_command('es_create_documents') # index_name not in settings. with self.assertRaises(SystemExit): call_command( 'es_create_documents', index_name='barfoo' ) # Index doesn't exist. with self.assertRaises(SystemExit): call_command( 'es_create_documents', index_name='foobar' ) index = Index('foobar') doc_type = Token.get_es_doc_type() index.doc_type(doc_type) index.create() self.refresh() # Disable auto indexing while creating objects. settings.TRAMPOLINE['OPTIONS']['disabled'] = True token = Token.objects.create(name="token") token_not_indexable = Token.objects.create(name='not_indexable') token_raise_exception = Token.objects.create(name='raise_exception') settings.TRAMPOLINE['OPTIONS']['disabled'] = False # Dry run. call_command( 'es_create_documents', index_name='foobar', dry_run=True ) self.assertDocDoesntExist(token) self.assertDocDoesntExist(token_not_indexable) self.assertDocDoesntExist(token_raise_exception) call_command( 'es_create_documents', index_name='foobar', verbosity=3 ) self.assertDocExists(token) self.assertDocDoesntExist(token_not_indexable) self.assertDocDoesntExist(token_raise_exception)
def test_es_create_alias(self): # Index name required. with self.assertRaises(SystemExit): call_command( 'es_create_alias', target_name='foobar_target' ) # Target name required. with self.assertRaises(SystemExit): call_command( 'es_create_alias', index_name='foobar' ) # Index doesn't exist. with self.assertRaises(SystemExit): call_command( 'es_create_alias', index_name='foobar', target_name='foobar_target' ) index = Index('foobar_target') index.create() self.refresh() # Alias with same name as index. with self.assertRaises(SystemExit): call_command( 'es_create_alias', index_name='foobar_target', target_name='foobar_target' ) # Dry run. call_command( 'es_create_alias', index_name='foobar', target_name='foobar_target', dry_run=True ) self.assertAliasDoesntExist(index='foobar_target', name='foobar') call_command( 'es_create_alias', index_name='foobar', target_name='foobar_target' ) self.assertAliasExists(index='foobar_target', name='foobar')
def test_registered_doc_type_included_in_to_dict(): i = Index('i', using='alias') i.doc_type(Post) assert Post._doc_type.index == 'i' assert { 'mappings': { 'post': { 'properties': { 'title': {'type': 'string'}, 'published_from': {'type': 'date'}, } } } } == i.to_dict()
def registerExtenderCallbacks(self, callbacks): self.callbacks = callbacks self.helpers = callbacks.getHelpers() callbacks.setExtensionName("Storing HTTP Requests/Responses into ElasticSearch") self.callbacks.registerHttpListener(self) self.callbacks.registerContextMenuFactory(self) self.out = callbacks.getStdout() res = connections.create_connection(hosts=[ES_host]) idx = Index(ES_index) idx.doc_type(DocHTTPRequestResponse) try: idx.create() except: print("Index already exists")
def test_doc_type_can_be_set(): i = Index('i', doc_type='t') m = Mapping('t') m.field('title', Text()) i.mapping(m) assert { 'mappings': { 't': { 'properties': { 'title': {'type': 'text'} } } } } == i.to_dict()
def test_es_delete_alias(self): # Index name required. with self.assertRaises(SystemExit): call_command( 'es_delete_alias', target_name='foobar_target' ) # Target name required. with self.assertRaises(SystemExit): call_command( 'es_delete_alias', index_name='foobar' ) # Index doesn't exist. with self.assertRaises(SystemExit): call_command( 'es_delete_alias', index_name='foobar', target_name='foobar_target', yes=True ) index = Index('foobar_target') index.create() self.refresh() # Alias doesn't exist. with self.assertRaises(SystemExit): call_command( 'es_delete_alias', index_name='foobar', target_name='foobar_target', yes=True ) trampoline_config.connection.indices.put_alias( index='foobar_target', name='foobar') self.assertAliasExists(index='foobar_target', name='foobar') call_command( 'es_delete_alias', index_name='foobar', target_name='foobar_target', yes=True ) self.assertAliasDoesntExist(index='foobar_target', name='foobar')
def run(self, start_date=None, end_date=None, update_bookmark=True): """Calculate statistics aggregations.""" # If no events have been indexed there is nothing to aggregate if not Index(self.event_index, using=self.client).exists(): return lower_limit = start_date or self.get_bookmark() # Stop here if no bookmark could be estimated. if lower_limit is None: return upper_limit = min( end_date or datetime.datetime.max, # ignore if `None` datetime.datetime.utcnow().replace(microsecond=0), datetime.datetime.combine( lower_limit + datetime.timedelta(self.batch_size), datetime.datetime.min.time()) ) while upper_limit <= datetime.datetime.utcnow(): self.indices = set() self.new_bookmark = upper_limit.strftime(self.doc_id_suffix) bulk(self.client, self.agg_iter(lower_limit, upper_limit), stats_only=True, chunk_size=50) # Flush all indices which have been modified current_search_client.indices.flush( index=','.join(self.indices), wait_if_ongoing=True ) if update_bookmark: self.set_bookmark() self.indices = set() lower_limit = lower_limit + datetime.timedelta(self.batch_size) upper_limit = min( end_date or datetime.datetime.max, # ignore if `None`` datetime.datetime.utcnow().replace(microsecond=0), lower_limit + datetime.timedelta(self.batch_size) ) if lower_limit > upper_limit: break
def create_indexes(names, settings=None): """ Create Elasticsearch indexes Args: names (list): A list of index names settings (dict): Index settings """ for name in names: index = Index(name) try: if not index.exists(): logger.debug("Creating Elasticsearch index: {0}".format(name)) if settings: index.put_settings(settings) index.create() except Exception as e: raise ElasticsearchError( "Elasticsearch error: {0}".format(e.__str__()))
class ElasticSearch: __logger = logging __client = Elasticsearch() __index_name = "merchant_services" __doc_type = MerchantServicesDocument __index = Index(__index_name) def search(self, query): self.__logger.debug("in: {}".format(query)) s = Search(using=self.__client, index=self.__index_name, doc_type=self.__doc_type) # q = Q('match', stem_terms=query) q = Q('match', terms=query) s = s.query(q) responses = s.execute() documents = [] if responses.success(): for hit in responses.hits: documents.append( MerchantServices.objects.get(native_id=hit.native_id)) self.__logger.debug("out: {}".format([ 'id: {}, name: {}'.format(doc.get_native_id(), doc.get_name()) for doc in documents ])) return documents def bulk_indexing(self): self.__index.delete(ignore=404) self.__index.create() self.__client.indices.close(index=self.__index_name) self.__doc_type.init() self.__client.indices.open(index=self.__index_name) bulk(client=self.__client, actions=(b.indexing() for b in MerchantServices.objects.all().iterator()))
def handle(self, *args, **options): from elasticsearch import Elasticsearch from elasticsearch_dsl import Index, Mapping ELASTICSEARCH_HOSTS = ['localhost'] ELASTICSEARCH_INDEX = 'kindle2' es = Elasticsearch(ELASTICSEARCH_HOSTS) newindex = Index(ELASTICSEARCH_INDEX, using=es) if newindex.exists(): exit('index already exists,change a new name') mp = Mapping() mp.field('title', 'text') mp.field('creator', 'text') mp.field('publisher', 'text') mp.field('iclass', 'text') mp.field('isbn', 'text') mp.field('asin', 'keyword') newindex.mapping(mp) newindex.create()
def construct_query(self, system, file_path=None): files_index_name = Index( settings.ES_INDEX_PREFIX.format('files')).get_alias().keys()[0] if system == settings.AGAVE_STORAGE_SYSTEM: storage_prefix_query = Q( {'prefix': { 'path._exact': '/' + self.username }}) else: storage_prefix_query = Q({'prefix': {'path._exact': '/'}}) ngram_query = Q("query_string", query=self.query_string, fields=["name"], minimum_should_match='80%', default_operator='or') match_query = Q("query_string", query=self.query_string, fields=["name._exact", "name._pattern"], default_operator='and') private_files_query = Q( 'bool', must=[ Q({'term': { '_index': files_index_name }}), Q({'term': { 'system._exact': system }}), storage_prefix_query, (ngram_query | match_query) ], must_not=[Q({"prefix": { "path._exact": "/.Trash" }})]) return private_files_query
def customer_index(app): """Initialize the `Customer` doc type.""" test_index = Index(uuid4().hex) test_index.create() app.cluster.health(wait_for_status='yellow') # monkey patch `auth_index` original_auth_index = auth_models.auth_index auth_models.auth_index = test_index Customer.init(index=test_index._name) Customer._doc_type.index = test_index._name yield test_index auth_models.auth_index = original_auth_index test_index.delete()
def applyConfig(self): try: print("Connecting to '%s', index '%s'" % (self.confESHost, self.confESIndex)) res = connections.create_connection(hosts=[self.confESHost]) idx = Index(self.confESIndex) idx.doc_type(DocHTTPRequestResponse) DocHTTPRequestResponse.init() try: idx.create() except: pass except Exception as e: JOptionPane.showMessageDialog( self.panel, "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>" % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)
def handle(self, *args, **options): index = "kplc_interruptions" es = Elasticsearch([{ 'host': settings.ES_SETTINGS['HOST'], 'port': settings.ES_SETTINGS['PORT'] }], index=index) kplc_index = Index(index, using=settings.ES_SETTINGS['ALIAS']) kplc_index.document(InterruptionPdfTextDoc) if kplc_index.exists(): kplc_index.delete() # TODO: Use logger print('Deleted kplc interruptions index.') InterruptionPdfTextDoc.init() result = bulk( client=es, actions=(pdf.index() for pdf in InterruptionPdfText.objects.all().iterator())) # TODO: Use logger print('Indexed kplc interruptions.') print(result)
def clean(self): cleaned_data = super().clean() param = cleaned_data.get("param") query = self.data['query'] search_type = cleaned_data.get("search_type") if search_type == 'simple': try: elastic_field = SimpleSearchField.objects.get( pk=param).elastic_index_field query = prepare_simple_query(query, elastic_field.field_type) except SimpleSearchField.DoesNotExist: raise forms.ValidationError("Невірний параметр запиту") else: inid_code = InidCodeSchedule.objects.filter( ipc_code=param, enable_search=1, elastic_index_field__isnull=False).first() if inid_code: elastic_field = inid_code.elastic_index_field query = prepare_advanced_query(query, elastic_field.field_type) else: raise forms.ValidationError("Невірний параметр запиту") # Валидация запроса в ElasticSearch client = Elasticsearch(settings.ELASTIC_HOST, timeout=settings.ELASTIC_TIMEOUT) i = Index(settings.ELASTIC_INDEX_NAME, using=client).validate_query( body={ 'query': Q('query_string', query=query, default_field=elastic_field.field_name, default_operator='AND').to_dict() }) if not i['valid']: raise forms.ValidationError("Невірний запит")
def construct_query(self, system, file_path=None): files_index_name = Index('des-files').get_alias().keys()[0] if system == settings.AGAVE_STORAGE_SYSTEM: storage_prefix_query = Q({'prefix': {'path._exact': '/' + self.username}}) else: storage_prefix_query = Q({'prefix': {'path._exact': '/'}}) private_files_query = Q( 'bool', must=[ Q({'term': {'_index': files_index_name}}), Q({'term': {'system._exact': system}}), storage_prefix_query, Q("query_string", query=self.query_string, default_operator="and") ], must_not=[ Q({"prefix": {"path._exact": "/.Trash"}}) ] ) return private_files_query
def create_index(): try: create_connection() db = Index(INDEX_NAME) db.settings(**INDEX_SETTINGS) db.create() except Exception as e: ActivityLog.objects.create_log( None, level='C', view_name='elastic_search.es_core_config.create_index', message= 'Error in creating index in ElasticSearch with error message - %s' % e.message, traceback=traceback.format_exc()) raise Exception(e) else: return db
def run(self, start_date=None, end_date=None, update_bookmark=True): """Calculate statistics aggregations.""" # If no events have been indexed there is nothing to aggregate if not Index(self.event_index, using=self.client).exists(): return lower_limit = self.bookmark_api.get_lower_limit(start_date) # Stop here if no bookmark could be estimated. if lower_limit is None: return upper_limit = self.bookmark_api.get_upper_limit( start_date, end_date, self.batch_size) while upper_limit <= datetime.datetime.utcnow() and self.has_events: self.indices = set() bulk(self.client, self.agg_iter(lower_limit, upper_limit), stats_only=True, chunk_size=50) # Flush all indices which have been modified current_search.flush_and_refresh(index='*') if update_bookmark and self.has_events: self.bookmark_api.set_bookmark( upper_limit.strftime(self.doc_id_suffix) or datetime.datetime.utcnow().strftime(self.doc_id_suffix)) lower_limit = lower_limit + datetime.timedelta(self.batch_size) upper_limit = min( end_date or datetime.datetime.max, # ignore if `None`` datetime.datetime.utcnow().replace(microsecond=0), lower_limit + datetime.timedelta(self.batch_size)) if lower_limit > upper_limit: break
def create_index(client, *, index_name, **kwargs): p = IngestClient(client) p.put_pipeline(id='document_attachment', body={ 'description': "Extract attachment information", 'processors': [{ "attachment": { "field": "source_file" } }] }) index = Index(index_name, using=client) index.doc_type(Document) try: index.create() except RequestError: print(f"Index named '{index_name}' already exists", file=sys.stderr) sys.exit(1)
def create_indexes(names=None, settings=None): """ Create Elasticsearch indexes Args: names (list): A list of index names ["dmarc_aggregate", "dmarc_forensic"] by default settings (dict): Index settings """ if names is None: names = ["dmarc_aggregate", "dmarc_forensic"] for name in names: index = Index(name) try: if not index.exists(): logger.debug("Creating Elasticsearch index: {0}".format(name)) if settings: index.put_settings(settings) index.create() except Exception as e: raise ElasticsearchError( "Elasticsearch error: {0}".format(e.__str__()))
def test_cloned_index_has_analysis_attribute(): """ Regression test for Issue #582 in which `Index.clone()` was not copying over the `_analysis` attribute. """ client = object() i = Index('my-index', using=client) random_analyzer_name = ''.join( (choice(string.ascii_letters) for _ in range(100))) random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard") i.analyzer(random_analyzer) i2 = i.clone('my-clone-index') assert i.to_dict()['settings']['analysis'] == i2.to_dict( )['settings']['analysis']
def mitm_request(self, data): # Initialize ES connection and index res = connections.create_connection(hosts=[args.elasticsearch]) idx = Index(args.index) idx.doc_type(DocHTTPRequestResponse) try: DocHTTPRequestResponse.init() idx.create() except: pass r = HTTPRequest(data) # determine url if self.is_connect: scheme = "https" else: scheme = "http" url = scheme + "://" + self.hostname if scheme == "http" and int( self.port) != 80 or scheme == "https" and int( self.port) != 443: url += ":" + self.port url += self.path if args.verbose: print(url) self.doc = DocHTTPRequestResponse(host=self.hostname, port=int(self.port), protocol=scheme) self.doc.meta.index = args.index self.doc.request.url = url self.doc.request.requestline = r.requestline self.doc.request.method = r.command self.doc.host = self.hostname self.doc.port = int(self.port) self.doc.protocol = scheme return data
def register(self, name=None, version=None, settings=None): """ Register an index locally. Note that `createall` is needed to save the index to Elasticsearch. The index will be named per convention such that: - The graph's name is used by default - The "test" suffix is added for unit testing (to avoid clobbering real data) If version is provided, it will be used to create generate an alias (to the unversioned name). """ if version is None: index_name = IndexRegistry.name_for(self.graph, name=name) alias_name = None else: # create index with full version, alias to shortened version index_name = IndexRegistry.name_for(self.graph, name=name, version=version) alias_name = IndexRegistry.name_for(self.graph, name=name) if index_name in self.indexes: raise Exception( "Index already registered for name: {}".format(index_name)) index = Index( name=index_name, using=self.graph.elasticsearch_client, ) if settings: index.settings(**settings) if alias_name is not None: index.aliases(**{alias_name: {}}) self.indexes[index_name] = index return index
def test_es_create_documents(self): # Index name required. with self.assertRaises(SystemExit): call_command('es_create_documents') # index_name not in settings. with self.assertRaises(SystemExit): call_command('es_create_documents', index_name='barfoo') # Index doesn't exist. with self.assertRaises(SystemExit): call_command('es_create_documents', index_name='foobar') index = Index('foobar') doc_type = Token.get_es_doc_type() index.doc_type(doc_type) index.create() self.refresh() # Disable auto indexing while creating objects. settings.TRAMPOLINE['OPTIONS']['disabled'] = True token = Token.objects.create(name="token") token_not_indexable = Token.objects.create(name='not_indexable') token_raise_exception = Token.objects.create(name='raise_exception') settings.TRAMPOLINE['OPTIONS']['disabled'] = False # Dry run. call_command('es_create_documents', index_name='foobar', dry_run=True) self.assertDocDoesntExist(token) self.assertDocDoesntExist(token_not_indexable) self.assertDocDoesntExist(token_raise_exception) call_command('es_create_documents', index_name='foobar', verbosity=3) self.assertDocExists(token) self.assertDocDoesntExist(token_not_indexable) self.assertDocDoesntExist(token_raise_exception)
def test_search_is_limited_to_index_name(): i = Index('my-index') s = i.search() assert s._index == ['my-index']
def exists(): return Index(APIDoc.Index.name).exists()
def refresh(): index = Index(APIDoc.Index.name) index.refresh()
def delete(): Index(APIDoc.Index.name).delete()
Should return list of family member names """ family = getattr(self.general, "family", None) if family: for member in family: if hasattr(member, "family_name"): yield member.family_name else: for member in parse_raw_family_string( getattr(self.general, "family_raw", "") ): if "family_name" in member: yield member["family_name"] declarations_idx = Index(OLD_DECLARATION_INDEX) declarations_idx.settings( number_of_shards=NUMBER_OF_SHARDS, number_of_replicas=NUMBER_OF_REPLICAS ) declarations_idx.analyzer(namesAutocompleteAnalyzer) declarations_idx.analyzer(namesAutocompleteSearchAnalyzer) @declarations_idx.doc_type class Declaration(DocType, AbstractDeclaration): """Declaration document. Assumes there's a dynamic mapping with all fields not indexed by default.""" persons = Text(analyzer="ukrainian", copy_to="all") countries = Text(analyzer="ukrainian", copy_to="all") companies = Text(analyzer="ukrainian", copy_to="all")
def handle(self, *args, **options): text_analyzer = get_text_analyzer("german") elastic_index = Index("mst_debug") if not elastic_index.exists(): elastic_index.create() elastic_index.close() elastic_index.analyzer(text_analyzer) elastic_index.save() elastic_index.open() elastic_index.flush() for word in options["words"]: analysis = elastic_index.analyze(body={ "analyzer": "text_analyzer", "text": word }) tokens = [i["token"] for i in analysis["tokens"]] self.stdout.write("{} {}\n".format(word, tokens))
def exists(): return Index(INDEX_NAME).exists()
def delete(): try: Index(INDEX_NAME).delete() except elasticsearch.exceptions.NotFoundError: log.info( 'Could not delete non-existent index, creating new index...')
def index_doc(doc, index_name="wiki-dumps"): global es index = Index(index_name) index.create()
def construct_query(self, system=None, file_path=None, **kwargs): project_query_fields = [ "projectId", "title", "description", "doi", "publications", "pis", "name" ] published_index_name = list( Index(settings.ES_INDEX_PREFIX.format( 'publications')).get_alias().keys())[0] legacy_index_name = list( Index(settings.ES_INDEX_PREFIX.format( 'publications-legacy')).get_alias().keys())[0] filter_queries = [] if kwargs.get('type_filters'): for type_filter in kwargs['type_filters']: if type_filter == 'nees': type_query = Q({'term': {'_index': legacy_index_name}}) else: type_query = Q( 'term', **{'project.value.projectType._exact': type_filter}) filter_queries.append(type_query) ds_user_query = Q({ "nested": { "path": "users", "ignore_unmapped": True, "query": { "query_string": { "query": self.query_string, "fields": [ "users.first_name", "users.last_name", "user.username" ], "lenient": True } } } }) nees_pi_query = Q({ "nested": { "path": "pis", "ignore_unmapped": True, "query": { "query_string": { "query": self.query_string, "fields": ["pis.firstName", "pis.lastName"], "lenient": True } } } }) pub_query = Q('query_string', query=self.query_string, default_operator='and', fields=project_query_fields) published_query = Q( 'bool', must=[ Q('bool', should=[ds_user_query, nees_pi_query, pub_query]), Q({'term': { '_index': legacy_index_name }}), ], must_not=[ Q('term', status='unpublished'), Q('term', status='saved') ]) return published_query
def get(self, request): """GET handler.""" q = request.GET.get('query_string') offset = int(request.GET.get('offset', 0)) limit = int(request.GET.get('limit', 10)) if limit > 500: return HttpResponseBadRequest("limit must not exceed 500") type_filter = request.GET.get('type_filter', 'all') doc_type_map = { Index(settings.ES_INDEX_PREFIX.format('publications')).get_alias().keys( )[0]: 'publication', Index(settings.ES_INDEX_PREFIX.format('publications-legacy')).get_alias( ).keys()[0]: 'publication', Index(settings.ES_INDEX_PREFIX.format('files')).get_alias().keys()[0]: 'file', Index(settings.ES_INDEX_PREFIX.format('cms')).get_alias().keys()[0]: 'modelresult' } public_files_query = CommunityDataSearchManager( request).construct_query() | PublishedDataSearchManager( request).construct_query() publications_query = PublicationsSearchManager( request).construct_query() cms_query = es_query = CMSSearchManager(request).construct_query() if type_filter == 'public_files': es_query = Search().query(public_files_query) elif type_filter == 'published': es_query = Search().query(publications_query) elif type_filter == 'cms': es_query = Search().query(cms_query).highlight( 'body', fragment_size=100).highlight_options(pre_tags=["<b>"], post_tags=["</b>"], require_field_match=False) elif type_filter == 'all': es_query = Search().query(public_files_query | publications_query | cms_query).highlight( 'body', fragment_size=100).highlight_options( pre_tags=["<b>"], post_tags=["</b>"], require_field_match=False) try: res = es_query.execute() except (TransportError, ConnectionTimeout) as err: if getattr(err, 'status_code', 500) == 404: raise res = es_query.execute() out = {} hits = [] for r in res: d = r.to_dict() d["doc_type"] = doc_type_map[r.meta.index] if hasattr(r.meta, 'highlight'): highlight = r.meta.highlight.to_dict() d["highlight"] = highlight if r.meta.doc_type == 'publication' and hasattr(r, 'users'): users = r.users pi = r.project.value.pi pi_user = filter(lambda x: x.username == pi, users)[0] d["piLabel"] = "{}, {}".format(pi_user.last_name, pi_user.first_name) hits.append(d) out['total_hits'] = res.hits.total.value out['hits'] = hits out['all_total'] = Search().query(public_files_query | publications_query | cms_query).count() out['public_files_total'] = Search().query(public_files_query).count() out['published_total'] = Search().query(publications_query).count() out['cms_total'] = Search().query(cms_query).count() return JsonResponse(out, safe=False)
# coding:utf-8 ''' @author = super_fazai @File : search.py @Time : 2017/8/11 10:41 @connect : [email protected] ''' from elasticsearch_dsl import ( DocType, Index, ) from scrapy import Field class Post(): id = Field() posts = Index('posts') @posts.doc_type class PostDocument(DocType): class Meta: model = Post fields = [ 'id', ]