def _build_response(result): objs, error_docs, missing_docs = [], [], [] for doc in result['docs']: if doc.get('found'): if error_docs or missing_docs: # We're going to raise an exception anyway, so avoid an # expensive call to cls.from_es(). continue objs.append(cls.from_es(doc)) elif doc.get('error'): if raise_on_error: error_docs.append(doc) if missing == 'none': objs.append(None) # The doc didn't cause an error, but the doc also wasn't found. elif missing == 'raise': missing_docs.append(doc) elif missing == 'none': objs.append(None) if error_docs: error_ids = [doc['_id'] for doc in error_docs] message = 'Required routing/parent not provided for documents %s.' message %= ', '.join(error_ids) raise RequestError(400, message, error_docs) if missing_docs: missing_ids = [doc['_id'] for doc in missing_docs] message = 'Documents %s not found.' % ', '.join(missing_ids) raise NotFoundError(404, message, missing_docs) return objs
def execute_raw_feeds_query(self, query_dict): connection = self.create_connection() if connection is not None: es_result = connection.search(index=self.feed_index, body=query_dict) return es_result else: raise RequestError("Cannot connect to ElasticSearch")
def mget(cls, docs, using=None, index=None, raise_on_error=True, missing='none', **kwargs): if missing not in ('raise', 'skip', 'none'): raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") es = connections.get_connection(using or cls._doc_type.using) body = { 'docs': [doc if isinstance(doc, dict) else { '_id': doc } for doc in docs] } results = es.mget(body, index=index or cls._doc_type.index, doc_type=cls._doc_type.name, **kwargs) objs, error_docs, missing_docs = [], [], [] for doc in results['docs']: if doc.get('found'): if error_docs or missing_docs: # We're going to raise an exception anyway, so avoid an # expensive call to cls.from_es(). continue objs.append(cls.from_es(doc)) elif doc.get('error'): if raise_on_error: error_docs.append(doc) if missing == 'none': objs.append(None) # The doc didn't cause an error, but the doc also wasn't found. elif missing == 'raise': missing_docs.append(doc) elif missing == 'none': objs.append(None) if error_docs: error_ids = [doc['_id'] for doc in error_docs] message = 'Required routing/parent not provided for documents %s.' message %= ', '.join(error_ids) raise RequestError(400, message, error_docs) if missing_docs: missing_ids = [doc['_id'] for doc in missing_docs] message = 'Documents %s not found.' % ', '.join(missing_ids) raise NotFoundError(404, message, missing_docs) return objs
def get_network(self, actor_id): """ Creates an array of the current network. :return: Dict array """ result = [] connection = self.create_connection() if connection is not None: es_result = connection.search(index=self.network_index, body=self.get_search_dict(actor_id)) if es_result["hits"]["total"] > 0: for hit in es_result["hits"]["hits"]: result.append(hit["_source"]) return result else: raise RequestError("Cannot connect to ElasticSearch")
def mget(self, body, index, doc_type='_all', params=None, headers=None): ids = body.get('ids') results = [] for id in ids: try: results.append(self.get(index, id, doc_type=doc_type, params=params, headers=headers)) except: pass if not results: raise RequestError( 400, 'action_request_validation_exception', 'Validation Failed: 1: no documents to get;' ) return {'docs': results}
def link_network_exists(self, link_object): """ Check whether a link object already exists in the network index :param link_object: The link object to check if exists :return: True if exists otherwise False """ if not isinstance(link_object, Link): raise LinkObjectError() connection = self.create_connection() if connection is not None: res = connection.search(index=self.network_index, body=link_object.get_search_dict()) if res["hits"]["total"] > 0: return True else: raise RequestError("Cannot connect to ElasticSearch") return False
def add_activity_feed(self, activity_object): """ Adds an activity to the feed index :param activity_object: The activity object being added to the index :return: The unique ID given to the activity """ if not isinstance(activity_object, Activity): raise ActivityObjectError() connection = self.create_connection() if connection is not None: unique_id = str(uuid.uuid4()) connection.index( index=self.feed_index, id=unique_id, body=activity_object.get_dict(), ) return unique_id else: raise RequestError("Cannot connect to ElasticSearch")
def remove_network_link(self, link_object): """ Removes a link from the network :param link_object: The Link object being removed from the index. :return: Bool """ if not isinstance(link_object, Link): raise LinkObjectError() if self.link_network_exists(link_object): connection = self.create_connection() if connection is not None: connection.delete_by_query( index=self.network_index, body=link_object.get_search_dict(), ) return True else: raise RequestError("Cannot connect to ElasticSearch") else: raise LinkNotExistError()
def test_register_doi_task_doesnt_retry_if_indexing_error( create_record, mocker): """Test failing register_doi task because of us.""" patched_client = mocker.patch( 'cd2h_repo_project.modules.doi.tasks.DataCiteMDSClient')() patched_retry = mocker.patch( 'cd2h_repo_project.modules.doi.tasks.register_doi.retry') # Because publish() triggers the task, we need to perform some of the steps # of publish() without calling publish() record = create_record(published=False) mint_pids_for_record(record.id, record) doi_pid = PersistentIdentifier.get(pid_type='doi', pid_value=record['id']) patched_client.metadata_post.side_effect = RequestError() register_doi(record['id']) number_retries = len(patched_retry.mock_calls) assert number_retries == 0 assert not record['doi']
def get_feeds(self, aggregator): """ Return an array of feeds. The structure of the elements will depend of the aggregator :param aggregator: Aggregator class :return: Array of feeds or empty array of the actor_id does not have any network links """ if not isinstance(aggregator, BaseAggregator): raise AggregatorObjectError() connection = self.create_connection() if connection is not None: aggregator.connection = connection aggregator.feed_index = self.feed_index aggregator.network_array = self.get_network(aggregator.actor_id) aggregator.set_query_dict() if aggregator.query_dict is not None: aggregator.set_aggregation_section() aggregator.query_feeds() return aggregator.get_feeds() else: return [] else: raise RequestError("Cannot connect to ElasticSearch")
def add_network_link(self, link_object): """ Adds a link to the network index :param link_object: The Link object being added to the index :return: The unique ID give to the link """ if not isinstance(link_object, Link): raise LinkObjectError() if not self.link_network_exists(link_object): connection = self.create_connection() if connection is not None: unique_id = str(uuid.uuid4()) connection.index( index=self.network_index, id=unique_id, body=link_object.get_dict(), ) return unique_id else: raise RequestError("Cannot connect to ElasticSearch") else: raise LinkExistError()
def mget(cls, docs, using=None, index=None, raise_on_error=True, missing='none', **kwargs): r""" Retrieve multiple document by their ``id``\s. Returns a list of instances in the same order as requested. :arg docs: list of ``id``\s of the documents to be retrieved or a list of document specifications as per https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html :arg index: elasticsearch index to use, if the ``Document`` is associated with an index this can be omitted. :arg using: connection alias to use, defaults to ``'default'`` :arg missing: what to do when one of the documents requested is not found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise ``NotFoundError``) or ``'skip'`` (ignore the missing document). Any additional keyword arguments will be passed to ``Elasticsearch.mget`` unchanged. """ if missing not in ('raise', 'skip', 'none'): raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") es = cls._get_connection(using) body = { 'docs': [ doc if isinstance(doc, collections_abc.Mapping) else {'_id': doc} for doc in docs ] } results = es.mget( body, index=cls._default_index(index), **kwargs ) objs, error_docs, missing_docs = [], [], [] for doc in results['docs']: if doc.get('found'): if error_docs or missing_docs: # We're going to raise an exception anyway, so avoid an # expensive call to cls.from_es(). continue objs.append(cls.from_es(doc)) elif doc.get('error'): if raise_on_error: error_docs.append(doc) if missing == 'none': objs.append(None) # The doc didn't cause an error, but the doc also wasn't found. elif missing == 'raise': missing_docs.append(doc) elif missing == 'none': objs.append(None) if error_docs: error_ids = [doc['_id'] for doc in error_docs] message = 'Required routing not provided for documents %s.' message %= ', '.join(error_ids) raise RequestError(400, message, error_docs) if missing_docs: missing_ids = [doc['_id'] for doc in missing_docs] message = 'Documents %s not found.' % ', '.join(missing_ids) raise NotFoundError(404, message, {'docs': missing_docs}) return objs
def __init__( self, feed_index="feeds", network_index="network", host="localhost", port=9200, url_prefix=None, use_ssl=False, number_of_shards_in_feeds=5, number_of_replicas_in_feeds=1, number_of_shards_in_network=5, number_of_replicas_in_network=1, delete_feeds_if_exists=False, delete_network_if_exists=False, max_link_size=1000, ): """ The constructor of the Manager. It creates the feeds and network indices if they don't exist. See https://www.elastic.co/guide/en/elasticsearch/reference/current/_basic_concepts.html#getting-started-shards-and-replicas for more information about shards and replicas :param feed_index: The name if the feed index. "feeds" by default :param network_index: The name of the network index. "network" by default :param host: ElasticSearch host name. "localhost" by default :param port: ElasticSearch port. 9200 by default :param url_prefix: URL prefix. None by default :param use_ssl: Use SSL to connect to ElasticSearch. False by default :param number_of_shards_in_feeds: Number of shards for the feeds index. 5 by default :param number_of_replicas_in_feeds: Number of replicas for the feeds index. 1 by default :param number_of_shards_in_network: Number of shards for the network index. 5 by default :param number_of_replicas_in_network: Number of replicas for the network index. 1 by default :param delete_feeds_if_exists: Delete the feeds index if already exist. False by default :param delete_network_if_exists: Delete the network index if already exist. False by default :param max_link_size: Maximum number of links to fetch from an actor """ self.host = host self.port = port self.url_prefix = url_prefix self.use_ssl = use_ssl self.feed_index = feed_index self.network_index = network_index self._max_link_size = max_link_size connection = self.create_connection() if connection is not None: if not connection.indices.exists(feed_index): try: connection.indices.create( feed_index, body=_get_feed_index_definition( number_of_shards_in_feeds, number_of_replicas_in_feeds), ) except RequestError as e: if e.status_code == 400: if e.error.find("already_exists") >= 0: if delete_feeds_if_exists: self.delete_feeds_index() connection.indices.create( feed_index, body=_get_feed_index_definition( number_of_shards_in_feeds, number_of_replicas_in_feeds, ), ) else: pass else: raise e else: raise e if not connection.indices.exists(network_index): try: connection.indices.create( network_index, body=_get_network_index_definition( number_of_shards_in_network, number_of_replicas_in_network), ) except RequestError as e: if e.status_code == 400: if e.error.find("already_exists") >= 0: if delete_network_if_exists: self.delete_network_index() connection.indices.create( network_index, body=_get_network_index_definition( number_of_shards_in_network, number_of_replicas_in_network, ), ) else: pass else: raise e else: raise e else: raise RequestError("Cannot connect to ElasticSearch")
def bulk(self, body, index=None, doc_type=None, params=None, headers=None): items = [] errors = False for raw_line in body.splitlines(): if len(raw_line.strip()) > 0: line = json.loads(raw_line) if any(action in line for action in ['index', 'create', 'update', 'delete']): action = next(iter(line.keys())) version = 1 index = line[action].get('_index') or index doc_type = line[action].get( '_type', "_doc") # _type is deprecated in 7.x if action in ['delete', 'update' ] and not line[action].get("_id"): raise RequestError( 400, 'action_request_validation_exception', 'missing id') document_id = line[action].get('_id', get_random_id()) if action == 'delete': status, result, error = self._validate_action( action, index, document_id, doc_type, params=params) item = { action: { '_type': doc_type, '_id': document_id, '_index': index, '_version': version, 'status': status, } } if error: errors = True item[action]["error"] = result else: self.delete(index, document_id, doc_type=doc_type, params=params) item[action]["result"] = result items.append(item) if index not in self.__documents_dict: self.__documents_dict[index] = list() else: if 'doc' in line and action == 'update': source = line['doc'] else: source = line status, result, error = self._validate_action( action, index, document_id, doc_type, params=params) item = { action: { '_type': doc_type, '_id': document_id, '_index': index, '_version': version, 'status': status, } } if not error: item[action]["result"] = result if self.exists(index, document_id, doc_type=doc_type, params=params): doc = self.get(index, document_id, doc_type=doc_type, params=params) version = doc['_version'] + 1 self.delete(index, document_id, doc_type=doc_type, params=params) self.__documents_dict[index].append({ '_type': doc_type, '_id': document_id, '_source': source, '_index': index, '_version': version }) else: errors = True item[action]["error"] = result items.append(item) return {'errors': errors, 'items': items}