def __init__(self, index_id, doc_type, query_class, log): self.index_id = index_id self.doc_type = doc_type self.query_engine = DSLQueryEngine(query_class) self.log = log # TODO(emfree): probably want to try to keep persistent connections # around, instead of creating a new one each time. self._connection = new_connection()
def __init__(self, index_id, doc_type, query_class): # TODO(emfree): probably want to try to keep persistent connections # around, instead of creating a new one each time. self._connection = new_connection() self.index_id = index_id self.doc_type = doc_type self.query_engine = DSLQueryEngine(query_class)
class BaseSearchAdaptor(object): """ Adapter between the API and an Elasticsearch backend, for a single index and document type. """ def __init__(self, index_id, doc_type, query_class): # TODO(emfree): probably want to try to keep persistent connections # around, instead of creating a new one each time. self._connection = new_connection() self.index_id = index_id self.doc_type = doc_type self.query_engine = DSLQueryEngine(query_class) @wrap_es_errors def _index_document(self, object_repr, **kwargs): """ (Re)index a document for the object with API representation `object_repr`. Creates the actual index for the namespace if it doesn't already exist. """ assert self.index_id == object_repr['namespace_id'] index_args = dict( index=self.index_id, doc_type=self.doc_type, id=object_repr['id'], body=object_repr) index_args.update(**kwargs) try: self._connection.index(**index_args) except elasticsearch.exceptions.TransportError: log.error('Index failure', index=self.index_id, doc_type=self.doc_type, object_id=index_args['_id']) raise @wrap_es_errors def _bulk_index(self, objects, parent=None): index_args = [] for object_repr in objects: args = dict(_index=self.index_id, _type=self.doc_type, _id=object_repr['id'], _source=object_repr) if parent is not None: args.update(dict(_parent=object_repr[parent])) index_args.append(args) try: count, failures = bulk(self._connection, index_args) except elasticsearch.exceptions.TransportError: # TODO[k]: log here log.error('Bulk index failure', index=self.index_id, doc_type=self.doc_type, object_ids=[i['_id'] for i in index_args], failures=failures) raise return count @wrap_es_errors def search(self, query, max_results=100, offset=0, explain=True): """Perform a search and return the results.""" dsl_query = self.query_engine.generate_query(query) log.debug('search query', query=query, dsl_query=dsl_query) raw_results = self._connection.search( index=self.index_id, doc_type=self.doc_type, body=dsl_query, size=max_results, from_=offset, explain=explain) self._log_query(query, raw_results) api_results = self.query_engine.process_results(raw_results) return api_results @wrap_es_errors def get_mapping(self): return self._connection.indices.get_mapping(index=self.index_id, doc_type=self.doc_type) def _log_query(self, query, raw_results): """ Log query and result info, stripping out actual result bodies but keeping ids and metadata. """ log_results = copy.deepcopy(raw_results) for hit in log_results['hits']['hits']: del hit['_source'] log.debug('search query results', query=query, results=log_results)
class BaseSearchAdaptor(object): """ Base adaptor between the Nilas API and Elasticsearch for a single index and document type. Subclasses implement the document type specific logic. """ def __init__(self, index_id, doc_type, query_class, log): self.index_id = index_id self.doc_type = doc_type self.query_engine = DSLQueryEngine(query_class) self.log = log # TODO(emfree): probably want to try to keep persistent connections # around, instead of creating a new one each time. self._connection = new_connection() @wrap_es_errors def _index_document(self, object_repr, **kwargs): """ (Re)index a document for the object with Nilas API representation `object_repr`. """ assert self.index_id == object_repr['namespace_id'] index_args = dict( index=self.index_id, doc_type=self.doc_type, id=object_repr['id'], body=object_repr) index_args.update(**kwargs) try: self._connection.index(**index_args) except elasticsearch.exceptions.TransportError as e: self.log.error('Index failure', error=e.error, doc_type=self.doc_type, object_id=index_args['_id']) raise @wrap_es_errors def _bulk(self, objects, parent=None): """ Perform a batch of index operations rather than a single one. Arguments --------- objects: list of (op_type, object) tuples. op_type defines the index operation to perform ('index' for creates, updates and 'delete' for deletes) object is a dict of document attributes required for the operation. Returns ------- Count of index operations on success, raises SearchEngineError on failure. """ index_args = [] def raise_error(failure): for op_type, info in failure.iteritems(): if info.get('status') not in [None, 404]: return True return False for op, object_repr in objects: args = dict(_op_type=op, _index=self.index_id, _type=self.doc_type, _id=object_repr['id']) if op != 'delete': args.update(dict(_source=object_repr)) if parent is not None: args.update(dict(_parent=object_repr[parent])) index_args.append(args) try: count, failures = bulk(self._connection, index_args) except elasticsearch.exceptions.TransportError as e: self.log.error('Bulk index failure', error=e.error, doc_type=self.doc_type, object_ids=[i['_id'] for i in index_args]) raise SearchEngineError('Bulk index failure!') if count != len(objects): self.log.error('Bulk index failure', error='Not all indices created', doc_type=self.doc_type, object_ids=[i['_id'] for i in index_args], failures=failures) if any(raise_error(f) for f in failures): raise SearchEngineError('Bulk index failure!') return count @wrap_es_errors def search(self, query, sort, max_results=100, offset=0, explain=True): """ Perform a search and return the results. """ dsl_query = self.query_engine.generate_query(query) self.log.debug('search query', query=query, dsl_query=dsl_query) search_kwargs = dict(index=self.index_id, doc_type=self.doc_type, body=dsl_query, size=max_results, from_=offset, explain=explain) # Split this out to a Sort class with subclasses for # MessageSort/ThreadSort if we expand sorting to be more flexible. if sort != 'relevance': if self.doc_type == 'message': timestamp_field = 'date' if self.doc_type == 'thread': timestamp_field = 'last_message_timestamp' search_kwargs['sort'] = '{}:desc'.format(timestamp_field) raw_results = self._connection.search(**search_kwargs) self._log_query(query, raw_results) total, api_results = self.query_engine.process_results(raw_results) return dict(total=total, results=api_results) @wrap_es_errors def get_mapping(self): return self._connection.indices.get_mapping(index=self.index_id, doc_type=self.doc_type) def _log_query(self, query, raw_results): """ Log query and result info, stripping out actual result bodies but keeping ids and metadata. """ log_results = copy.deepcopy(raw_results) for hit in log_results['hits']['hits']: del hit['_source'] self.log.debug('search query results', query=query, results=log_results)
class BaseSearchAdaptor(object): """ Base adaptor between the Nilas API and Elasticsearch for a single index and document type. Subclasses implement the document type specific logic. """ def __init__(self, index_id, doc_type, query_class, log): self.index_id = index_id self.doc_type = doc_type self.query_engine = DSLQueryEngine(query_class) self.log = log # TODO(emfree): probably want to try to keep persistent connections # around, instead of creating a new one each time. self._connection = new_connection() @wrap_es_errors def _index_document(self, object_repr, **kwargs): """ (Re)index a document for the object with Nilas API representation `object_repr`. """ assert self.index_id == object_repr['namespace_id'] index_args = dict(index=self.index_id, doc_type=self.doc_type, id=object_repr['id'], body=object_repr) index_args.update(**kwargs) try: self._connection.index(**index_args) except elasticsearch.exceptions.TransportError as e: self.log.error('Index failure', error=e.error, doc_type=self.doc_type, object_id=index_args['_id']) raise @wrap_es_errors def _bulk(self, objects, parent=None): """ Perform a batch of index operations rather than a single one. Arguments --------- objects: list of (op_type, object) tuples. op_type defines the index operation to perform ('index' for creates, updates and 'delete' for deletes) object is a dict of document attributes required for the operation. Returns ------- Count of index operations on success, raises SearchEngineError on failure. """ index_args = [] def raise_error(failure): for op_type, info in failure.iteritems(): if info.get('status') not in [None, 404]: return True return False for op, object_repr in objects: args = dict(_op_type=op, _index=self.index_id, _type=self.doc_type, _id=object_repr['id']) if op != 'delete': args.update(dict(_source=object_repr)) if parent is not None: args.update(dict(_parent=object_repr[parent])) index_args.append(args) try: count, failures = bulk(self._connection, index_args) except elasticsearch.exceptions.TransportError as e: self.log.error('Bulk index failure', error=e.error, doc_type=self.doc_type, object_ids=[i['_id'] for i in index_args]) raise SearchEngineError('Bulk index failure!') if count != len(objects): self.log.error('Bulk index failure', error='Not all indices created', doc_type=self.doc_type, object_ids=[i['_id'] for i in index_args], failures=failures) if any(raise_error(f) for f in failures): raise SearchEngineError('Bulk index failure!') return count @wrap_es_errors def search(self, query, sort, max_results=100, offset=0, explain=True): """ Perform a search and return the results. """ dsl_query = self.query_engine.generate_query(query) self.log.debug('search query', query=query, dsl_query=dsl_query) search_kwargs = dict(index=self.index_id, doc_type=self.doc_type, body=dsl_query, size=max_results, from_=offset, explain=explain) # Split this out to a Sort class with subclasses for # MessageSort/ThreadSort if we expand sorting to be more flexible. if sort != 'relevance': if self.doc_type == 'message': timestamp_field = 'date' if self.doc_type == 'thread': timestamp_field = 'last_message_timestamp' search_kwargs['sort'] = '{}:desc'.format(timestamp_field) raw_results = self._connection.search(**search_kwargs) self._log_query(query, raw_results) total, api_results = self.query_engine.process_results(raw_results) return dict(total=total, results=api_results) @wrap_es_errors def get_mapping(self): return self._connection.indices.get_mapping(index=self.index_id, doc_type=self.doc_type) def _log_query(self, query, raw_results): """ Log query and result info, stripping out actual result bodies but keeping ids and metadata. """ log_results = copy.deepcopy(raw_results) for hit in log_results['hits']['hits']: del hit['_source'] self.log.debug('search query results', query=query, results=log_results)