async def related_word_extractor(parent_docid, doc_datetime, term, debug=False): es = Elasticsearch(['%s:%d'%(es_ip, es_port)]) #print("%s %d" % ((es_ip, es_port))) highlight_req = { "_source" : [""], "query": { "bool": { "filter": [ { "term": { "_id": parent_docid } }, { "query_string": { "query": term, "fields": ["doc_title", "doc_content"], "default_operator": "AND" } } ] } }, "highlight": { "fields": { "_all" : {}, "doc_title": { "fragment_size": 30, "number_of_fragments": 1, "fragmenter": "simple" }, "doc_content": { "fragment_size": 30, "number_of_fragments": 3, "fragmenter": "simple" } } } } result = await es.search(index=INDEX_DOCUMENTS+"-"+re.sub("-" , ".", doc_datetime[:doc_datetime.find("T")]), doc_type=TYPE_DOC, body=highlight_req) related = [] if result['hits']['total']>0: title_fragments = [] content_fragments = [] for a in result['hits']['hits']: if 'doc_title' in a['highlight']: title_fragments = [ fragment for fragment in a['highlight']['doc_title'] ] if 'doc_content' in a['highlight']: content_fragments = [ fragment for fragment in a['highlight']['doc_content'] ] for f in (title_fragments+content_fragments): related += await get_close_word(f, debug) es.close() return list(filter(lambda x:len(x)>1, list(sorted(set(related), key=lambda x:related.index(x)))))
def client(es_params, index, loop): client = Elasticsearch([{'host': es_params['host']}], loop=loop) try: loop.run_until_complete(client.delete(index, '', '')) except NotFoundError: pass yield client client.close()
def client(es_params, loop, repo_name, snapshot_name): client = Elasticsearch([{'host': es_params['host']}], loop=loop) try: loop.run_until_complete(client.delete(INDEX, '', '')) except NotFoundError: pass yield client # cleaning up just in case try: loop.run_until_complete( client.snapshot.delete(repo_name, snapshot_name)) except NotFoundError: pass try: loop.run_until_complete(client.snapshot.delete_repository(repo_name)) except NotFoundError: pass client.close()
class ElasticsearchController: def __init__(self): self._elasticsearch = None self.elasticsearch_conn = False self._loop = None def get_loop(self): return self._loop def get_elasticsearch_client(self): return self._elasticsearch_client @timestamped async def get_value(self, index: str, id: str, doc_type='_all'): """ Get record from elasticsearch using id and index :param index: used to search the named index for records :param id: used to find record in the index :param doc_type: :return: [] """ try: assert await self.exists(index, id) is True result = await self._elasticsearch_client.get(index, id) except AssertionError as e: logger.error( "Elasticsearch client doesn't exist when it should. " + str(e)) result = "" result = result.decode('utf-8') return result @timestamped async def create(self, index, doc_type, body, id=None): """ Used to create new record in the elasticsearch database :param index: used to create in that specific index :param doc_type: specify elasticsearch document type :param body: actual body of the record to be created in the database :return: Json object """ return await self._elasticsearch_client.create(index, doc_type, body, id=42) async def exists(self, index, id): """ Used to check if record exist in the elasticsearch database using id and index :param index: search index for record :param id: find record for the id :return: """ return await self._elasticsearch_client.exists(index, id) @timestamped async def update(self, index, doc_type, id, body=None): """ Used to update record in the elasticsearch database :param index: select index to update record in that index :param doc_type: specify elasticsearch document type :param id: Identifier for the record to be updated :param body: the actual body for the record :return: """ return await self._elasticsearch_client.update(index, doc_type, id, body=body) @timestamped async def search(self, index=None, doc_type=None, body=None): """ Used to search for the record in the elasticsearch database :param index: used to search the index :param doc_type: :param body: query to be executed to match the result :return: """ return await self._elasticsearch_client.search(index=index, doc_type=doc_type, body=body) @timestamped async def delete(self, index, doc_type, id): """ Used to delete record from elasticsearch database :param index: specify the index for the record to be deleted :param doc_type: :param id: specify the id for the record to be deleted :return: """ return await self._elasticsearch_client.delete(index, doc_type, id) def set_loop(self, loop): self._loop = loop @timestamped async def add_elasticsearch_connection(self): elasticsearch_host = None elasticsearch_port = None try: elasticsearch_host = os.getenv("ELASTICSEARCH_HOST", "redis.dev.muchneededllc.com") elasticsearch_port = str(os.getenv("ELASTICSEARCH_PORT", 9200)) except OSError as e: logger.error( "Couldn't get environmental variables for elasticearch. " + str(e)) exit(1) try: if self._loop is not None: address = ':'.join([elasticsearch_host, elasticsearch_port]) self._elasticsearch_client = Elasticsearch([address], loop=self._loop) logger.info(self._elasticsearch_client) self.elasticsearch_conn = True logger.debug("Created Elasticsearch Client.") else: logger.error( "Couldn't create elasticsearch client because loop hasn't been set." ) except Exception as e: logger.error("couldn't open elasticsearch.") raise Exception(e) def cleanup(self): if self.elasticsearch_conn: self._elasticsearch_client.close()
class ElasticSearchManager(DefaultSearchUtility): def __init__(self, settings={}, loop=None): self.loop = loop self._conn = None self._migration_lock = None @property def bulk_size(self): return self.settings.get('bulk_size', 50) @property def settings(self): return app_settings.get('elasticsearch', {}) @property def conn(self): if self._conn is None: self._conn = Elasticsearch(loop=self.loop, **self.settings['connection_settings']) return self._conn @property def enabled(self): return len( self.settings.get('connection_settings', {}).get('endpoints', [])) > 0 async def initialize(self, app): self.app = app self._migration_lock = asyncio.Lock() async def finalize(self, app): if self._conn is not None: self._conn.close() async def get_registry(self, container, request): if request is None: request = get_current_request() if hasattr(request, 'container_settings'): return request.container_settings annotations_container = IAnnotations(container) request.container_settings = await annotations_container.async_get( REGISTRY_DATA_KEY) return request.container_settings async def get_real_index_name(self, container, request=None): index_name = await self.get_index_name(container, request) version = await self.get_version(container, request) return index_name + '_' + str(version) async def get_index_name(self, container, request=None): registry = await self.get_registry(container, request) try: result = registry['el_index_name'] except KeyError: result = app_settings['elasticsearch'].get( 'index_name_prefix', 'guillotina-') + container.id return result async def get_next_index_name(self, container, request=None): registry = await self.get_registry(container, request) if ('el_next_index_version' not in registry or registry['el_next_index_version'] is None): return None index_name = await self.get_index_name(container, request) version = registry['el_next_index_version'] return index_name + '_' + str(version) async def set_index_name(self, container, name, request=None): registry = await self.get_registry(container, request) registry['el_index_name'] = name registry._p_register() async def initialize_catalog(self, container): if not self.enabled: return await self.remove_catalog(container) index_name = await self.get_index_name(container) real_index_name = await self.get_real_index_name(container) await safe_es_call(self.conn.indices.create, real_index_name) await safe_es_call(self.conn.indices.put_alias, index_name, real_index_name) await safe_es_call(self.conn.indices.close, index_name) await safe_es_call(self.install_mappings_on_index, index_name) await self.conn.indices.open(index_name) await self.conn.cluster.health(wait_for_status='yellow') await self.set_index_name(container, index_name) async def remove_catalog(self, container): if not self.enabled: return index_name = await self.get_index_name(container) real_index_name = await self.get_real_index_name(container) await safe_es_call(self.conn.indices.close, real_index_name) await safe_es_call(self.conn.indices.delete_alias, real_index_name, index_name) await safe_es_call(self.conn.indices.delete, real_index_name) await safe_es_call(self.conn.indices.delete, index_name) async def get_version(self, container, request=None): registry = await self.get_registry(container, request) try: version = registry['el_index_version'] except KeyError: version = 1 return version async def set_version(self, container, version, request=None, force=False): registry = await self.get_registry(container, request) if (not force and 'el_next_index_version' in registry and registry['el_next_index_version'] is not None): raise Exception( 'Cannot change index while migration is in progress') registry['el_index_version'] = version registry._p_register() async def stats(self, container): index_name = await self.get_index_name(container) return await self.conn.indices.stats(index_name) async def install_mappings_on_index(self, index_name): mappings = get_mappings() index_settings = DEFAULT_SETTINGS.copy() index_settings.update(app_settings.get('index', {})) await self.conn.indices.close(index_name) await self.conn.indices.put_settings(index_settings, index_name) for key, value in mappings.items(): await self.conn.indices.put_mapping(index_name, key, value) await self.conn.indices.open(index_name) async def activate_next_index(self, container, version, request=None, force=False): ''' Next index support designates an index to also push delete and index calls to ''' registry = await self.get_registry(container, request) if not force: try: assert registry['el_next_index_version'] is None except KeyError: pass registry['el_next_index_version'] = version registry._p_register() async def disable_next_index(self, container, request=None): ''' Next index support designates an index to also push delete and index calls to ''' registry = await self.get_registry(container, request) registry['el_next_index_version'] = None registry._p_register() async def apply_next_index(self, container, request=None): # make sure to reload the registry to make sure we have the latest # to write to if (request is not None and hasattr(request, 'container_settings') and REGISTRY_DATA_KEY in container.__annotations__): await request._txn.refresh(request.container_settings) registry = await self.get_registry(container, request) assert registry['el_next_index_version'] is not None await self.set_version(container, registry['el_next_index_version'], request, force=True) registry['el_next_index_version'] = None registry._p_register()