Beispiel #1
0
        def _build_response(result):
            objs, error_docs, missing_docs = [], [], []
            for doc in result['docs']:
                if doc.get('found'):
                    if error_docs or missing_docs:
                        # We're going to raise an exception anyway, so avoid an
                        # expensive call to cls.from_es().
                        continue

                    objs.append(cls.from_es(doc))

                elif doc.get('error'):
                    if raise_on_error:
                        error_docs.append(doc)
                    if missing == 'none':
                        objs.append(None)

                # The doc didn't cause an error, but the doc also wasn't found.
                elif missing == 'raise':
                    missing_docs.append(doc)
                elif missing == 'none':
                    objs.append(None)

            if error_docs:
                error_ids = [doc['_id'] for doc in error_docs]
                message = 'Required routing/parent not provided for documents %s.'
                message %= ', '.join(error_ids)
                raise RequestError(400, message, error_docs)
            if missing_docs:
                missing_ids = [doc['_id'] for doc in missing_docs]
                message = 'Documents %s not found.' % ', '.join(missing_ids)
                raise NotFoundError(404, message, missing_docs)
            return objs
Beispiel #2
0
 def execute_raw_feeds_query(self, query_dict):
     connection = self.create_connection()
     if connection is not None:
         es_result = connection.search(index=self.feed_index,
                                       body=query_dict)
         return es_result
     else:
         raise RequestError("Cannot connect to ElasticSearch")
Beispiel #3
0
    def mget(cls,
             docs,
             using=None,
             index=None,
             raise_on_error=True,
             missing='none',
             **kwargs):
        if missing not in ('raise', 'skip', 'none'):
            raise ValueError("'missing' must be 'raise', 'skip', or 'none'.")
        es = connections.get_connection(using or cls._doc_type.using)
        body = {
            'docs':
            [doc if isinstance(doc, dict) else {
                '_id': doc
            } for doc in docs]
        }
        results = es.mget(body,
                          index=index or cls._doc_type.index,
                          doc_type=cls._doc_type.name,
                          **kwargs)

        objs, error_docs, missing_docs = [], [], []
        for doc in results['docs']:
            if doc.get('found'):
                if error_docs or missing_docs:
                    # We're going to raise an exception anyway, so avoid an
                    # expensive call to cls.from_es().
                    continue

                objs.append(cls.from_es(doc))

            elif doc.get('error'):
                if raise_on_error:
                    error_docs.append(doc)
                if missing == 'none':
                    objs.append(None)

            # The doc didn't cause an error, but the doc also wasn't found.
            elif missing == 'raise':
                missing_docs.append(doc)
            elif missing == 'none':
                objs.append(None)

        if error_docs:
            error_ids = [doc['_id'] for doc in error_docs]
            message = 'Required routing/parent not provided for documents %s.'
            message %= ', '.join(error_ids)
            raise RequestError(400, message, error_docs)
        if missing_docs:
            missing_ids = [doc['_id'] for doc in missing_docs]
            message = 'Documents %s not found.' % ', '.join(missing_ids)
            raise NotFoundError(404, message, missing_docs)
        return objs
Beispiel #4
0
 def get_network(self, actor_id):
     """
     Creates an array of the current network.
     :return: Dict array
     """
     result = []
     connection = self.create_connection()
     if connection is not None:
         es_result = connection.search(index=self.network_index,
                                       body=self.get_search_dict(actor_id))
         if es_result["hits"]["total"] > 0:
             for hit in es_result["hits"]["hits"]:
                 result.append(hit["_source"])
         return result
     else:
         raise RequestError("Cannot connect to ElasticSearch")
Beispiel #5
0
 def mget(self, body, index, doc_type='_all', params=None, headers=None):
     ids = body.get('ids')
     results = []
     for id in ids:
         try:
             results.append(self.get(index, id, doc_type=doc_type,
                 params=params, headers=headers))
         except:
             pass
     if not results:
         raise RequestError(
             400,
             'action_request_validation_exception',
             'Validation Failed: 1: no documents to get;'
         )
     return {'docs': results}
Beispiel #6
0
 def link_network_exists(self, link_object):
     """
     Check whether a link object already exists in the network index
     :param link_object: The link object to check if exists
     :return: True if exists otherwise False
     """
     if not isinstance(link_object, Link):
         raise LinkObjectError()
     connection = self.create_connection()
     if connection is not None:
         res = connection.search(index=self.network_index,
                                 body=link_object.get_search_dict())
         if res["hits"]["total"] > 0:
             return True
     else:
         raise RequestError("Cannot connect to ElasticSearch")
     return False
Beispiel #7
0
 def add_activity_feed(self, activity_object):
     """
     Adds an activity to the feed index
     :param activity_object: The activity object being added to the index
     :return: The unique ID given to the activity
     """
     if not isinstance(activity_object, Activity):
         raise ActivityObjectError()
     connection = self.create_connection()
     if connection is not None:
         unique_id = str(uuid.uuid4())
         connection.index(
             index=self.feed_index,
             id=unique_id,
             body=activity_object.get_dict(),
         )
         return unique_id
     else:
         raise RequestError("Cannot connect to ElasticSearch")
Beispiel #8
0
 def remove_network_link(self, link_object):
     """
     Removes a link from the network
     :param link_object: The Link object being removed from the index.
     :return: Bool
     """
     if not isinstance(link_object, Link):
         raise LinkObjectError()
     if self.link_network_exists(link_object):
         connection = self.create_connection()
         if connection is not None:
             connection.delete_by_query(
                 index=self.network_index,
                 body=link_object.get_search_dict(),
             )
             return True
         else:
             raise RequestError("Cannot connect to ElasticSearch")
     else:
         raise LinkNotExistError()
Beispiel #9
0
def test_register_doi_task_doesnt_retry_if_indexing_error(
        create_record, mocker):
    """Test failing register_doi task because of us."""
    patched_client = mocker.patch(
        'cd2h_repo_project.modules.doi.tasks.DataCiteMDSClient')()
    patched_retry = mocker.patch(
        'cd2h_repo_project.modules.doi.tasks.register_doi.retry')
    # Because publish() triggers the task, we need to perform some of the steps
    # of publish() without calling publish()
    record = create_record(published=False)
    mint_pids_for_record(record.id, record)
    doi_pid = PersistentIdentifier.get(pid_type='doi', pid_value=record['id'])

    patched_client.metadata_post.side_effect = RequestError()

    register_doi(record['id'])

    number_retries = len(patched_retry.mock_calls)
    assert number_retries == 0
    assert not record['doi']
Beispiel #10
0
 def get_feeds(self, aggregator):
     """
     Return an array of feeds. The structure of the elements will depend of the aggregator
     :param aggregator: Aggregator class
     :return: Array of feeds or empty array of the actor_id does not have any network links
     """
     if not isinstance(aggregator, BaseAggregator):
         raise AggregatorObjectError()
     connection = self.create_connection()
     if connection is not None:
         aggregator.connection = connection
         aggregator.feed_index = self.feed_index
         aggregator.network_array = self.get_network(aggregator.actor_id)
         aggregator.set_query_dict()
         if aggregator.query_dict is not None:
             aggregator.set_aggregation_section()
             aggregator.query_feeds()
             return aggregator.get_feeds()
         else:
             return []
     else:
         raise RequestError("Cannot connect to ElasticSearch")
Beispiel #11
0
 def add_network_link(self, link_object):
     """
     Adds a link to the network index
     :param link_object: The Link object being added to the index
     :return: The unique ID give to the link
     """
     if not isinstance(link_object, Link):
         raise LinkObjectError()
     if not self.link_network_exists(link_object):
         connection = self.create_connection()
         if connection is not None:
             unique_id = str(uuid.uuid4())
             connection.index(
                 index=self.network_index,
                 id=unique_id,
                 body=link_object.get_dict(),
             )
             return unique_id
         else:
             raise RequestError("Cannot connect to ElasticSearch")
     else:
         raise LinkExistError()
Beispiel #12
0
    def mget(cls, docs, using=None, index=None, raise_on_error=True,
             missing='none', **kwargs):
        r"""
        Retrieve multiple document by their ``id``\s. Returns a list of instances
        in the same order as requested.

        :arg docs: list of ``id``\s of the documents to be retrieved or a list
            of document specifications as per
            https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html
        :arg index: elasticsearch index to use, if the ``Document`` is
            associated with an index this can be omitted.
        :arg using: connection alias to use, defaults to ``'default'``
        :arg missing: what to do when one of the documents requested is not
            found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise
            ``NotFoundError``) or ``'skip'`` (ignore the missing document).

        Any additional keyword arguments will be passed to
        ``Elasticsearch.mget`` unchanged.
        """
        if missing not in ('raise', 'skip', 'none'):
            raise ValueError("'missing' must be 'raise', 'skip', or 'none'.")
        es = cls._get_connection(using)
        body = {
            'docs': [
                doc if isinstance(doc, collections_abc.Mapping) else {'_id': doc}
                for doc in docs
            ]
        }
        results = es.mget(
            body,
            index=cls._default_index(index),
            **kwargs
        )

        objs, error_docs, missing_docs = [], [], []
        for doc in results['docs']:
            if doc.get('found'):
                if error_docs or missing_docs:
                    # We're going to raise an exception anyway, so avoid an
                    # expensive call to cls.from_es().
                    continue

                objs.append(cls.from_es(doc))

            elif doc.get('error'):
                if raise_on_error:
                    error_docs.append(doc)
                if missing == 'none':
                    objs.append(None)

            # The doc didn't cause an error, but the doc also wasn't found.
            elif missing == 'raise':
                missing_docs.append(doc)
            elif missing == 'none':
                objs.append(None)

        if error_docs:
            error_ids = [doc['_id'] for doc in error_docs]
            message = 'Required routing not provided for documents %s.'
            message %= ', '.join(error_ids)
            raise RequestError(400, message, error_docs)
        if missing_docs:
            missing_ids = [doc['_id'] for doc in missing_docs]
            message = 'Documents %s not found.' % ', '.join(missing_ids)
            raise NotFoundError(404, message, {'docs': missing_docs})
        return objs
Beispiel #13
0
    def __init__(
        self,
        feed_index="feeds",
        network_index="network",
        host="localhost",
        port=9200,
        url_prefix=None,
        use_ssl=False,
        number_of_shards_in_feeds=5,
        number_of_replicas_in_feeds=1,
        number_of_shards_in_network=5,
        number_of_replicas_in_network=1,
        delete_feeds_if_exists=False,
        delete_network_if_exists=False,
        max_link_size=1000,
    ):
        """
        The constructor of the Manager. It creates the feeds and network indices if they don't exist. See
        https://www.elastic.co/guide/en/elasticsearch/reference/current/_basic_concepts.html#getting-started-shards-and-replicas
        for more information about shards and replicas
        :param feed_index: The name if the feed index. "feeds" by default
        :param network_index: The name of the network index. "network" by default
        :param host: ElasticSearch host name. "localhost" by default
        :param port: ElasticSearch port. 9200 by default
        :param url_prefix: URL prefix. None by default
        :param use_ssl: Use SSL to connect to ElasticSearch. False by default
        :param number_of_shards_in_feeds: Number of shards for the feeds index. 5 by default
        :param number_of_replicas_in_feeds: Number of replicas for the feeds index. 1 by default
        :param number_of_shards_in_network: Number of shards for the network index. 5 by default
        :param number_of_replicas_in_network: Number of replicas for the network index. 1 by default
        :param delete_feeds_if_exists: Delete the feeds index if already exist. False by default
        :param delete_network_if_exists: Delete the network index if already exist. False by default
        :param max_link_size: Maximum number of links to fetch from an actor
        """
        self.host = host
        self.port = port
        self.url_prefix = url_prefix
        self.use_ssl = use_ssl
        self.feed_index = feed_index
        self.network_index = network_index
        self._max_link_size = max_link_size

        connection = self.create_connection()
        if connection is not None:
            if not connection.indices.exists(feed_index):
                try:
                    connection.indices.create(
                        feed_index,
                        body=_get_feed_index_definition(
                            number_of_shards_in_feeds,
                            number_of_replicas_in_feeds),
                    )
                except RequestError as e:
                    if e.status_code == 400:
                        if e.error.find("already_exists") >= 0:
                            if delete_feeds_if_exists:
                                self.delete_feeds_index()
                                connection.indices.create(
                                    feed_index,
                                    body=_get_feed_index_definition(
                                        number_of_shards_in_feeds,
                                        number_of_replicas_in_feeds,
                                    ),
                                )
                            else:
                                pass
                        else:
                            raise e
                    else:
                        raise e
            if not connection.indices.exists(network_index):
                try:
                    connection.indices.create(
                        network_index,
                        body=_get_network_index_definition(
                            number_of_shards_in_network,
                            number_of_replicas_in_network),
                    )
                except RequestError as e:
                    if e.status_code == 400:
                        if e.error.find("already_exists") >= 0:
                            if delete_network_if_exists:
                                self.delete_network_index()
                                connection.indices.create(
                                    network_index,
                                    body=_get_network_index_definition(
                                        number_of_shards_in_network,
                                        number_of_replicas_in_network,
                                    ),
                                )
                            else:
                                pass
                        else:
                            raise e
                    else:
                        raise e
        else:
            raise RequestError("Cannot connect to ElasticSearch")
    def bulk(self, body, index=None, doc_type=None, params=None, headers=None):
        items = []
        errors = False

        for raw_line in body.splitlines():
            if len(raw_line.strip()) > 0:
                line = json.loads(raw_line)

                if any(action in line
                       for action in ['index', 'create', 'update', 'delete']):
                    action = next(iter(line.keys()))

                    version = 1
                    index = line[action].get('_index') or index
                    doc_type = line[action].get(
                        '_type', "_doc")  # _type is deprecated in 7.x

                    if action in ['delete', 'update'
                                  ] and not line[action].get("_id"):
                        raise RequestError(
                            400, 'action_request_validation_exception',
                            'missing id')

                    document_id = line[action].get('_id', get_random_id())

                    if action == 'delete':
                        status, result, error = self._validate_action(
                            action,
                            index,
                            document_id,
                            doc_type,
                            params=params)
                        item = {
                            action: {
                                '_type': doc_type,
                                '_id': document_id,
                                '_index': index,
                                '_version': version,
                                'status': status,
                            }
                        }
                        if error:
                            errors = True
                            item[action]["error"] = result
                        else:
                            self.delete(index,
                                        document_id,
                                        doc_type=doc_type,
                                        params=params)
                            item[action]["result"] = result
                        items.append(item)

                    if index not in self.__documents_dict:
                        self.__documents_dict[index] = list()
                else:
                    if 'doc' in line and action == 'update':
                        source = line['doc']
                    else:
                        source = line
                    status, result, error = self._validate_action(
                        action, index, document_id, doc_type, params=params)
                    item = {
                        action: {
                            '_type': doc_type,
                            '_id': document_id,
                            '_index': index,
                            '_version': version,
                            'status': status,
                        }
                    }
                    if not error:
                        item[action]["result"] = result
                        if self.exists(index,
                                       document_id,
                                       doc_type=doc_type,
                                       params=params):
                            doc = self.get(index,
                                           document_id,
                                           doc_type=doc_type,
                                           params=params)
                            version = doc['_version'] + 1
                            self.delete(index,
                                        document_id,
                                        doc_type=doc_type,
                                        params=params)

                        self.__documents_dict[index].append({
                            '_type': doc_type,
                            '_id': document_id,
                            '_source': source,
                            '_index': index,
                            '_version': version
                        })
                    else:
                        errors = True
                        item[action]["error"] = result
                    items.append(item)
        return {'errors': errors, 'items': items}