Example #1
0
    def find_one(self, endpoint_name, req, **lookup):
        """Find single item.

        :param endpoint_name: resource name
        :param req: parsed request
        :param lookup: additional filter
        """
        backend = self._backend(endpoint_name)
        item = backend.find_one(endpoint_name, req=req, **lookup)
        search_backend = self._lookup_backend(endpoint_name, fallback=True)
        if search_backend:
            # set the parent for the parent child in elastic search
            self._set_parent(endpoint_name, item, lookup)
            item_search = search_backend.find_one(endpoint_name,
                                                  req=req,
                                                  **lookup)
            if item is None and item_search:
                item = item_search
                logger.warn(item_msg('item is only in elastic', item))
            elif item_search is None and item:
                logger.warn(item_msg('item is only in mongo', item))
                try:
                    logger.info(item_msg('trying to add item to elastic',
                                         item))
                    search_backend.insert(endpoint_name, [item])
                except RequestError as e:
                    logger.error(
                        item_msg(
                            'failed to add item into elastic error={}'.format(
                                str(e)), item))
        return item
Example #2
0
    def _validate(self, doc, **kwargs):
        use_headline = kwargs and 'headline' in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            validation_schema = self._get_validator_schema(validator)
            self._sanitize_fields(doc['validate'], validator)
            self._process_media(doc['validate'], validation_schema)
            self._process_sms(doc['validate'], validation_schema)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc['validate'], validation_schema)
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' %
                                 str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                messages = []
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get('act', None) == 'correct' and e == 'dateline':
                    continue
                elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \
                        e in ('headline', 'abstract', 'body_html'):
                    continue
                elif e == 'extra':
                    for field in error_list[e]:
                        display_name = self._get_vocabulary_display_name(field)
                        if 'required' in error_list[e][field]:
                            messages.append(
                                REQUIRED_ERROR.format(display_name))
                        else:
                            messages.append('{} {}'.format(
                                display_name, error_list[e][field]))
                elif error_list[e] == 'required field' or type(error_list[e]) is dict or \
                        type(error_list[e]) is list:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is 1' == error_list[
                        e] or 'null value not allowed' in error_list[e]:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is' in error_list[e]:
                    messages.append('{} is too short'.format(e.upper()))
                elif 'max length is' in error_list[e]:
                    messages.append('{} is too long'.format(e.upper()))
                else:
                    messages.append('{} {}'.format(e.upper(), error_list[e]))

                for message in messages:
                    if use_headline:
                        headline = '{}: {}'.format(
                            doc['validate'].get('headline',
                                                doc['validate'].get('_id')),
                            message)
                        response.append(headline)
                    else:
                        response.append(message)
            return response
        else:
            logger.warn('validator was not found for {}'.format(doc['act']))
            return []
Example #3
0
    def _change_request(self, endpoint_name, id, updates, original):
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)

        try:
            backend.update(endpoint_name, id, updates, original)
        except eve.io.base.DataLayer.OriginalChangedError:
            if not backend.find_one(endpoint_name, req=None, _id=id) and search_backend:
                # item is in elastic, not in mongo - not good
                logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id))
                self.remove_from_search(endpoint_name, id)
                raise SuperdeskApiError.notFoundError()
            else:
                # item is there, but no change was done - ok
                logger.error('Item : {} not updated in collection {}. '
                             'Updates are : {}'.format(id, endpoint_name, updates))
                return updates

        if search_backend:
            doc = backend.find_one(endpoint_name, req=None, _id=id)
            if not doc:  # there is no doc in mongo, remove it from elastic
                logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id))
                self.remove_from_search(endpoint_name, id)
                raise SuperdeskApiError.notFoundError()
            search_backend.update(endpoint_name, id, doc)

        return updates
Example #4
0
    def _update(self, provider, update):
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get('config')
        if not provider_config:
            provider_config = {}
            provider['config'] = provider_config

        provider_config.setdefault('url', 'http://rmb.reuters.com/rmd/rest/xml')
        provider_config.setdefault('auth_url', 'https://commerce.reuters.com/rmd/rest/xml/login')
        self.URL = provider_config.get('url')

        for channel in self._get_channels():
            ids = self._get_article_ids(channel, last_updated, updated)
            for id in ids:
                try:
                    items = self.fetch_ingest(id)
                    if items:
                        yield items
                # if there was an exception processing the one of the bunch log it and continue
                except Exception as ex:
                    logger.warn('Reuters item {} has not been retrieved'.format(id))
                    logger.exception(ex)
Example #5
0
    def delete(self, endpoint_name, lookup):
        """Delete method to delete by using mongo query syntax.

        :param endpoint_name: Name of the endpoint
        :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}``
        :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1}
        """
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)
        docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest())
        ids = [doc[config.ID_FIELD] for doc in docs]
        removed_ids = ids
        logger.info("total documents to be removed {}".format(len(ids)))
        if search_backend and ids:
            removed_ids = []
            # first remove it from search backend, so it won't show up. when this is done - remove it from mongo
            for _id in ids:
                try:
                    self.remove_from_search(endpoint_name, _id)
                    removed_ids.append(_id)
                except NotFoundError:
                    logger.warning('item missing from elastic _id=%s' % (_id, ))
                    removed_ids.append(_id)
                except:
                    logger.exception('item can not be removed from elastic _id=%s' % (_id, ))
        backend.remove(endpoint_name, {config.ID_FIELD: {'$in': removed_ids}})
        logger.info("Removed {} documents from {}.".format(len(ids), endpoint_name))
        if not ids:
            logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
Example #6
0
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url,
                                            params=payload,
                                            timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn(
                        'Reuters API timeout retrying, retries {}'.format(
                            retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError(
                    _('Not found {payload}').format(payload=payload))

            break

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Example #7
0
    def _validate(self, doc):
        validator = self._get_validator(doc)

        if validator is None:
            logger.warn('Validator was not found for type:{}'.format(
                doc[ITEM_TYPE]))
            return []

        validation_schema = self._get_validator_schema(
            validator, doc.get('validate_on_post'))

        v = SchemaValidator()
        v.allow_unknown = True

        try:
            v.validate(doc['validate'], validation_schema)
        except TypeError as e:
            logger.exception('Invalid validator schema value "%s" for ' %
                             str(e))

        error_list = v.errors
        response = []
        for field in error_list:
            error = error_list[field]

            # If error is a list, only return the first error
            if isinstance(error, list):
                error = error[0]

            if error == 'empty values not allowed' or error == 'required field':
                response.append(REQUIRED_ERROR.format(field.upper()))
            else:
                response.append('{} {}'.format(field.upper(), error))

        return response
Example #8
0
    def find_one(self, endpoint_name, req, **lookup):
        """Find single item.

        :param endpoint_name: resource name
        :param req: parsed request
        :param lookup: additional filter
        """
        backend = self._backend(endpoint_name)
        item = backend.find_one(endpoint_name, req=req, **lookup)
        search_backend = self._lookup_backend(endpoint_name, fallback=True)
        if search_backend:
            # set the parent for the parent child in elastic search
            self._set_parent(endpoint_name, item, lookup)
            item_search = search_backend.find_one(endpoint_name, req=req, **lookup)
            if item is None and item_search:
                item = item_search
                logger.warn(item_msg('item is only in elastic', item))
            elif item_search is None and item:
                logger.warn(item_msg('item is only in mongo', item))
                try:
                    logger.info(item_msg('trying to add item to elastic', item))
                    search_backend.insert(endpoint_name, [item])
                except RequestError as e:
                    logger.error(item_msg('failed to add item into elastic error={}'.format(str(e)), item))
        return item
Example #9
0
    def _change_request(self, endpoint_name, id, updates, original):
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)

        try:
            backend.update(endpoint_name, id, updates, original)
        except eve.io.base.DataLayer.OriginalChangedError:
            if not backend.find_one(endpoint_name, req=None, _id=id) and search_backend:
                # item is in elastic, not in mongo - not good
                logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id))
                item = search_backend.find_one(endpoint_name, req=None, _id=id)
                if item:
                    self.remove_from_search(endpoint_name, item)
                raise SuperdeskApiError.notFoundError()
            else:
                # item is there, but no change was done - ok
                logger.error('Item : {} not updated in collection {}. '
                             'Updates are : {}'.format(id, endpoint_name, updates))
                return updates

        if search_backend:

            doc = backend.find_one(endpoint_name, req=None, _id=id)
            if not doc:  # there is no doc in mongo, remove it from elastic
                logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id))
                item = search_backend.find_one(endpoint_name, req=None, _id=id)
                if item:
                    self.remove_from_search(endpoint_name, item)
                raise SuperdeskApiError.notFoundError()
            search_backend.update(endpoint_name, id, doc)

        return updates
Example #10
0
 def delete_docs(self, endpoint_name, docs):
     """Delete using list of documents."""
     backend = self._backend(endpoint_name)
     search_backend = self._lookup_backend(endpoint_name)
     ids = [doc[config.ID_FIELD] for doc in docs]
     removed_ids = ids
     logger.info("total documents to be removed {}".format(len(ids)))
     if search_backend and ids:
         removed_ids = []
         # first remove it from search backend, so it won't show up. when this is done - remove it from mongo
         for doc in docs:
             try:
                 self.remove_from_search(endpoint_name, doc)
                 removed_ids.append(doc[config.ID_FIELD])
             except NotFoundError:
                 logger.warning('item missing from elastic _id=%s' %
                                (doc[config.ID_FIELD], ))
                 removed_ids.append(doc[config.ID_FIELD])
             except Exception:
                 logger.exception(
                     'item can not be removed from elastic _id=%s' %
                     (doc[config.ID_FIELD], ))
     if len(removed_ids):
         backend.remove(endpoint_name,
                        {config.ID_FIELD: {
                            '$in': removed_ids
                        }})
         logger.info("Removed %d documents from %s.", len(removed_ids),
                     endpoint_name)
     else:
         logger.warn("No documents for %s resource were deleted.",
                     endpoint_name)
     return removed_ids
Example #11
0
    def delete(self, endpoint_name, lookup):
        """Delete method to delete by using mongo query syntax.

        :param endpoint_name: Name of the endpoint
        :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}``
        :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1}
        """
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)
        docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest())
        ids = [doc[config.ID_FIELD] for doc in docs]
        removed_ids = ids
        logger.info("total documents to be removed {}".format(len(ids)))
        if search_backend and ids:
            removed_ids = []
            # first remove it from search backend, so it won't show up. when this is done - remove it from mongo
            for _id in ids:
                try:
                    self.remove_from_search(endpoint_name, _id)
                    removed_ids.append(_id)
                except NotFoundError:
                    logger.warning('item missing from elastic _id=%s' % (_id, ))
                    removed_ids.append(_id)
                except:
                    logger.exception('item can not be removed from elastic _id=%s' % (_id, ))
        backend.remove(endpoint_name, {config.ID_FIELD: {'$in': removed_ids}})
        logger.info("Removed {} documents from {}.".format(len(ids), endpoint_name))
        if not ids:
            logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
Example #12
0
    def _update(self, provider, update):
        updated = utcnow()

        last_updated = provider.get("last_updated")
        ttl_minutes = app.config["INGEST_EXPIRY_MINUTES"]
        if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get("config")
        if not provider_config:
            provider_config = {}
            provider["config"] = provider_config

        provider_config.setdefault("url", "http://rmb.reuters.com/rmd/rest/xml")
        provider_config.setdefault("auth_url", "https://commerce.reuters.com/rmd/rest/xml/login")
        self.URL = provider_config.get("url")

        for channel in self._get_channels():
            ids = self._get_article_ids(channel, last_updated, updated)
            for id in ids:
                try:
                    items = self.fetch_ingest(id)
                    if items:
                        yield items
                # if there was an exception processing the one of the bunch log it and continue
                except Exception as ex:
                    logger.warn("Reuters item {} has not been retrieved".format(id))
                    logger.exception(ex)
Example #13
0
    def _change_request(self,
                        endpoint_name,
                        id,
                        updates,
                        original,
                        change_request=False,
                        push_notification=True):
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)

        try:
            if change_request:  # allows using mongo operations other than $set
                backend._change_request(endpoint_name, id, updates, original)
            else:
                backend.update(endpoint_name, id, updates, original)
            if push_notification:
                self._push_resource_notification("updated",
                                                 endpoint_name,
                                                 _id=str(id),
                                                 fields=get_diff_keys(
                                                     updates, original))
        except eve.io.base.DataLayer.OriginalChangedError:
            if not backend.find_one(endpoint_name, req=None,
                                    _id=id) and search_backend:
                # item is in elastic, not in mongo - not good
                logger.warn(
                    "Item is missing in mongo resource={} id={}".format(
                        endpoint_name, id))
                item = search_backend.find_one(endpoint_name, req=None, _id=id)
                if item:
                    self.remove_from_search(endpoint_name, item)
                raise SuperdeskApiError.notFoundError()
            else:
                # item is there, but no change was done
                logger.error(
                    "Item was not updated in mongo, it has changed from the original.",
                    extra=dict(
                        id=id,
                        resource=endpoint_name,
                        updates=updates,
                        original=original,
                    ),
                )
                return updates

        if search_backend:
            doc = backend.find_one(endpoint_name, req=None, _id=id)
            if not doc:  # there is no doc in mongo, remove it from elastic
                logger.warn(
                    "Item is missing in mongo resource={} id={}".format(
                        endpoint_name, id))
                item = search_backend.find_one(endpoint_name, req=None, _id=id)
                if item:
                    self.remove_from_search(endpoint_name, item)
                raise SuperdeskApiError.notFoundError()
            search_backend.update(endpoint_name, id, doc)

        return updates
Example #14
0
    def _validate(self, doc, **kwargs):
        use_headline = kwargs and 'headline' in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            validation_schema = self._get_validator_schema(validator)
            self._sanitize_fields(doc['validate'], validator)
            self._set_default_subject_scheme(doc['validate'])
            self._process_media(doc['validate'], validation_schema)
            self._process_sms(doc['validate'], validation_schema)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc['validate'], validation_schema)
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' % str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                messages = []
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get('act', None) == 'correct' and e == 'dateline':
                    continue
                elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \
                        e in ('headline', 'abstract', 'body_html'):
                    continue
                elif e == 'extra':
                    for field in error_list[e]:
                        display_name = self._get_vocabulary_display_name(field)
                        if 'required' in error_list[e][field]:
                            messages.append(REQUIRED_ERROR.format(display_name))
                        else:
                            messages.append('{} {}'.format(display_name, error_list[e][field]))
                elif error_list[e] == 'required field' or type(error_list[e]) is dict or \
                        type(error_list[e]) is list:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is 1' == error_list[e] or 'null value not allowed' in error_list[e]:
                    messages.append(REQUIRED_ERROR.format(e.upper()))
                elif 'min length is' in error_list[e]:
                    messages.append('{} is too short'.format(e.upper()))
                elif 'max length is' in error_list[e]:
                    messages.append('{} is too long'.format(e.upper()))
                else:
                    messages.append('{} {}'.format(e.upper(), error_list[e]))

                for message in messages:
                    if use_headline:
                        headline = '{}: {}'.format(doc['validate'].get('headline',
                                                                       doc['validate'].get('_id')), message)
                        response.append(headline)
                    else:
                        response.append(message)
            return response
        else:
            logger.warn('validator was not found for {}'.format(doc['act']))
            return []
Example #15
0
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url, params=payload, timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn('Reuters API timeout retrying, retries {}'.format(retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError('Not found %s' % payload)

            break

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Example #16
0
    def delete(self, endpoint_name, lookup):
        """Delete method to delete by using mongo query syntax.

        :param endpoint_name: Name of the endpoint
        :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}``
        :returns: Returns list of ids which were removed.
        """
        docs = list(self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()).sort("_id", 1))
        removed_ids = self.delete_docs(endpoint_name, docs)
        if len(docs) and not len(removed_ids):
            logger.warn("No documents for %s resource were deleted using lookup %s", endpoint_name, lookup)
        return removed_ids
Example #17
0
    def search(self, endpoint_name, source):
        """Search for items using search backend

        :param string endpoint_name
        :param dict source
        """
        req = ParsedRequest()
        req.args = {'source': json.dumps(source)}
        search_backend = self._lookup_backend(endpoint_name)
        if search_backend:
            return search_backend.find(endpoint_name, req, {})
        else:
            logger.warn('there is no search backend for %s' % endpoint_name)
Example #18
0
    def search(self, endpoint_name, source):
        """Search for items using search backend

        :param string endpoint_name
        :param dict source
        """
        req = ParsedRequest()
        req.args = {'source': json.dumps(source)}
        search_backend = self._lookup_backend(endpoint_name)
        if search_backend:
            return search_backend.find(endpoint_name, req, {})
        else:
            logger.warn('there is no search backend for %s' % endpoint_name)
Example #19
0
 def find_one(self, endpoint_name, req, **lookup):
     backend = self._backend(endpoint_name)
     item = backend.find_one(endpoint_name, req=req, **lookup)
     search_backend = self._lookup_backend(endpoint_name, fallback=True)
     if search_backend:
         item_search = search_backend.find_one(endpoint_name, req=req, **lookup)
         if item is None and item_search:
             item = item_search
             logger.warn(item_msg('item is only in elastic', item))
         elif item_search is None and item:
             logger.warn(item_msg('item is only in mongo', item))
             try:
                 logger.info(item_msg('trying to add item to elastic', item))
                 search_backend.insert(endpoint_name, [item])
             except RequestError as e:
                 logger.error(item_msg('failed to add item into elastic error={}'.format(str(e)), item))
     return item
Example #20
0
 def delete(self, endpoint_name, lookup):
     """
     Delete method to delete by using mongo query syntax
     :param endpoint_name: Name of the endpoint
     :param lookup: User mongo query syntax. example 1. {'_id':123}, 2. {'item_id': {'$in': [123, 234]}}
     :returns:
     Returns the mongo remove command response. {'n': 12, 'ok': 1}
     """
     backend = self._backend(endpoint_name)
     search_backend = self._lookup_backend(endpoint_name)
     docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest())
     ids = [doc[config.ID_FIELD] for doc in docs]
     backend.remove(endpoint_name, {config.ID_FIELD: {'$in': ids}})
     if search_backend and ids:
         self._remove_documents_from_search_backend(endpoint_name, ids)
     if not ids:
         logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
Example #21
0
 def find_one(self, endpoint_name, req, **lookup):
     backend = self._backend(endpoint_name)
     item = backend.find_one(endpoint_name, req=req, **lookup)
     search_backend = self._lookup_backend(endpoint_name, fallback=True)
     if search_backend:
         item_search = search_backend.find_one(endpoint_name, req=req, **lookup)
         if item is None and item_search:
             item = item_search
             logger.warn(item_msg('item is only in elastic', item))
         elif item_search is None and item:
             logger.warn(item_msg('item is only in mongo', item))
             try:
                 logger.info(item_msg('trying to add item to elastic', item))
                 search_backend.insert(endpoint_name, [item])
             except RequestError as e:
                 logger.error(item_msg('failed to add item into elastic error={}'.format(str(e)), item))
     return item
Example #22
0
        def get_parent_id(doc):
            if not doc.get('rewrite_of'):
                return doc[config.ID_FIELD]
            elif doc['rewrite_of'] not in docs:
                # If the parent item was not part of this stats iteration
                # then load it now
                parent = service.find_one(req=None, _id=doc['rewrite_of'])

                if not parent:
                    # Stats entry for the parent item was not found
                    logger.warn('Failed to get parent item {}'.format(
                        doc['rewrite_of']))
                    return None
                else:
                    docs[doc['rewrite_of']] = parent

            return get_parent_id(docs[doc['rewrite_of']])
Example #23
0
    def _validate(self, doc, **kwargs):
        use_headline = kwargs and 'headline' in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            self._sanitize_fields(doc['validate'], validator)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc['validate'],
                           self._get_validator_schema(validator))
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' %
                                 str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get('act', None) == 'correct' and e == 'dateline':
                    continue
                elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \
                        e in ('headline', 'abstract', 'body_html'):
                    continue
                elif error_list[e] == 'required field' or type(error_list[e]) is dict or \
                        type(error_list[e]) is list:
                    message = '{} is a required field'.format(e.upper())
                elif 'min length is 1' == error_list[e]:
                    message = '{} is a required field'.format(e.upper())
                elif 'min length is' in error_list[e]:
                    message = '{} is too short'.format(e.upper())
                elif 'max length is' in error_list[e]:
                    message = '{} is too long'.format(e.upper())
                else:
                    message = '{} {}'.format(e.upper(), error_list[e])

                if use_headline:
                    response.append('{}: {}'.format(
                        doc['validate'].get('headline',
                                            doc['validate'].get('_id')),
                        message))
                else:
                    response.append(message)
            return response
        else:
            logger.warn('validator was not found for {}'.format(doc['act']))
            return []
Example #24
0
def __get_redis(app_ctx):
    """Constructs Redis Client object.

    :return: Redis Client object
    """
    redis_url = app_ctx.config['REDIS_URL']
    try:
        return redis.from_url(redis_url)
    except ValueError as e:
        logger.warn(
            'Failed to connect to redis using a connection string: {}'.format(
                e))

        # Newer Redis clients will not accept 'amqp' as the scheme
        # Attempt to mock a redis scheme instead
        protocol = redis_url.split('//')[0]
        new_url = redis_url.replace(protocol, 'redis:')
        return redis.from_url(new_url)
Example #25
0
    def _validate(self, doc, **kwargs):
        use_headline = kwargs and "headline" in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            self._sanitize_fields(doc["validate"], validator)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc["validate"], self._get_validator_schema(validator))
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' % str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get("act", None) == "correct" and e == "dateline":
                    continue
                elif (
                    doc.get("act", None) == "kill"
                    and doc["validate"].get("profile", None)
                    and e in ("headline", "abstract", "body_html")
                ):
                    continue
                elif error_list[e] == "required field" or type(error_list[e]) is dict or type(error_list[e]) is list:
                    message = "{} is a required field".format(e.upper())
                elif "min length is 1" == error_list[e]:
                    message = "{} is a required field".format(e.upper())
                elif "min length is" in error_list[e]:
                    message = "{} is too short".format(e.upper())
                elif "max length is" in error_list[e]:
                    message = "{} is too long".format(e.upper())
                else:
                    message = "{} {}".format(e.upper(), error_list[e])

                if use_headline:
                    response.append(
                        "{}: {}".format(doc["validate"].get("headline", doc["validate"].get("_id")), message)
                    )
                else:
                    response.append(message)
            return response
        else:
            logger.warn("validator was not found for {}".format(doc["act"]))
            return []
Example #26
0
    def delete(self, endpoint_name, lookup):
        """
        Delete method to delete by using mongo query syntax
        :param endpoint_name: Name of the endpoint
        :param lookup: User mongo query syntax. example 1. {'_id':123}, 2. {'item_id': {'$in': [123, 234]}}
        :returns:
        Returns the mongo remove command response. {'n': 12, 'ok': 1}
        """
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)
        docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest())
        ids = [doc[config.ID_FIELD] for doc in docs]
        res = backend.remove(endpoint_name, {config.ID_FIELD: {'$in': ids}})
        if res and res.get('n', 0) > 0 and search_backend is not None:
            self._remove_documents_from_search_backend(endpoint_name, ids)

        if res and res.get('n', 0) == 0:
            logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))

        return res
Example #27
0
    def _change_request(self, endpoint_name, id, updates, original):
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)

        try:
            backend.update(endpoint_name, id, updates, original)
        except eve.io.base.DataLayer.OriginalChangedError:
            if not backend.find_one(endpoint_name, req=None, _id=id):
                # item is in elastic, not in mongo - not good
                logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id))
                self._remove_documents_from_search_backend(endpoint_name, [id])
                raise SuperdeskApiError.notFoundError()
            else:
                # item is there, but no change was done - ok
                return updates

        if search_backend:
            doc = backend.find_one(endpoint_name, req=None, _id=id)
            search_backend.update(endpoint_name, id, doc)

        return updates
Example #28
0
    def _change_request(self, endpoint_name, id, updates, original):
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)

        try:
            backend.update(endpoint_name, id, updates, original)
        except eve.io.base.DataLayer.OriginalChangedError:
            if not backend.find_one(endpoint_name, req=None, _id=id):
                # item is in elastic, not in mongo - not good
                logger.warn(
                    "Item is missing in mongo resource=%s id=%s".format(
                        endpoint_name, id))
                self._remove_documents_from_search_backend(endpoint_name, [id])
                raise SuperdeskApiError.notFoundError()
            else:
                # item is there, but no change was done - ok
                return updates

        if search_backend:
            doc = backend.find_one(endpoint_name, req=None, _id=id)
            search_backend.update(endpoint_name, id, doc)

        return updates
Example #29
0
    def _update(self, provider):
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(
                minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get('config')
        if not provider_config:
            provider_config = {}
            provider['config'] = provider_config

        if 'url' not in provider_config:
            provider_config['url'] = 'http://rmb.reuters.com/rmd/rest/xml'

        if 'auth_url' not in provider_config:
            provider_config[
                'auth_url'] = 'https://commerce.reuters.com/rmd/rest/xml/login'

        self.URL = provider_config.get('url')

        for channel in self._get_channels():
            ids = self._get_article_ids(channel, last_updated, updated)
            for id in ids:
                try:
                    items = self.fetch_ingest(id)
                    if items:
                        yield items
                # if there was an exception processing the one of the bunch log it and continue
                except Exception as ex:
                    logger.warn(
                        'Reuters item {} has not been retrieved'.format(id))
                    logger.exception(ex)
Example #30
0
def get_attachment_public_url(attachment: Dict[str, Any]) -> Optional[str]:
    """Returns the file url for the attachment provided

    :param dict attachment: The attachment to get the file URL
    :rtype: str
    :return: None if the attachment is not public, otherwise the public URL to the file
    """

    if attachment.get("attachment"):  # retrieve object reference
        attachment = superdesk.get_resource_service("attachments").find_one(
            req=None, _id=attachment["attachment"])

    if attachment.get("internal"):
        return None

    if not attachment.get("media"):
        # All attachments should have a `media` attribute set
        # The provided attachment dict must be invalid
        attachment_id = str(attachment.get("_id"))
        logger.warn(f'Attachment "{attachment_id}" has no media attribute set')

        return None

    return current_app.media.url_for_external(attachment["media"], RESOURCE)
Example #31
0
    def _get_article_ids(self, channel, last_updated, updated):
        """
        Get article ids which should be upserted also save the poll token that is returned.
        """
        ids = set()
        payload = {"channel": channel, "fieldsRef": "id"}

        # check if the channel has a pollToken if not fall back to dateRange
        last_poll_token = self._get_poll_token(channel)
        if last_poll_token is not None:
            logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token))
            payload["pollToken"] = last_poll_token
        else:
            payload["dateRange"] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated))
            logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload["dateRange"]))

        tree = self._get_tree("items", payload)
        status_code = tree.find("status").get("code") if tree.tag == "results" else tree.get("code")
        # check the returned status
        if status_code != "10":
            logger.warn("Reuters channel request returned status code {}".format(status_code))
            # status code 30 indicates failure
            if status_code == "30":
                # invalid token
                logger.warn(
                    "Reuters error on channel {} code {} {}".format(
                        channel, tree.find("error").get("code"), tree.find("error").text
                    )
                )
                if tree.find("error").get("code") == "2100":
                    self._save_poll_token(channel, None)
                    logger.warn("Reuters channel invalid token reseting {}".format(status_code))
                return ids

        # extract the returned poll token if there is one
        poll_token = tree.find("pollToken")
        if poll_token is not None:
            # a new token indicated new content
            if poll_token.text != last_poll_token:
                logger.info("Reuters channel {} new token {}".format(channel, poll_token.text))
                self._save_poll_token(channel, poll_token.text)
            else:
                # the token has not changed, so nothing new
                logger.info("Reuters channel {} nothing new".format(channel))
                return ids
        else:
            logger.info("Reuters channel {} retrieved no token".format(channel))
            return ids

        for result in tree.findall("result"):
            id = result.find("id").text
            ids.add(id)
            logger.info("Reuters id : {}".format(id))

        return ids
Example #32
0
    def _get_article_ids(self, channel, last_updated, updated):
        """
        Get article ids which should be upserted also save the poll token that is returned.
        """
        ids = set()
        payload = {'channel': channel, 'fieldsRef': 'id'}

        # check if the channel has a pollToken if not fall back to dateRange
        last_poll_token = self._get_poll_token(channel)
        if last_poll_token is not None:
            logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token))
            payload['pollToken'] = last_poll_token
        else:
            payload['dateRange'] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated))
            logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload['dateRange']))

        tree = self._get_tree('items', payload)
        status_code = tree.find('status').get('code') if tree.tag == 'results' else tree.get('code')
        # check the returned status
        if status_code != '10':
            logger.warn("Reuters channel request returned status code {}".format(status_code))
            # status code 30 indicates failure
            if status_code == '30':
                # invalid token
                logger.warn("Reuters error on channel {} code {} {}".format(channel, tree.find('error').get('code'),
                                                                            tree.find('error').text))
                if tree.find('error').get('code') == '2100':
                    self._save_poll_token(channel, None)
                    logger.warn("Reuters channel invalid token reseting {}".format(status_code))
                return ids

        # extract the returned poll token if there is one
        poll_token = tree.find('pollToken')
        if poll_token is not None:
            # a new token indicated new content
            if poll_token.text != last_poll_token:
                logger.info("Reuters channel {} new token {}".format(channel, poll_token.text))
                self._save_poll_token(channel, poll_token.text)
            else:
                # the token has not changed, so nothing new
                logger.info("Reuters channel {} nothing new".format(channel))
                return ids
        else:
            logger.info("Reuters channel {} retrieved no token".format(channel))
            return ids

        for result in tree.findall('result'):
            id = result.find('id').text
            ids.add(id)
            logger.info("Reuters id : {}".format(id))

        return ids
Example #33
0
    def _get_article_ids(self, channel, last_updated, updated):
        """
        Get article ids which should be upserted also save the poll token that is returned.
        """
        ids = set()
        payload = {'channel': channel, 'fieldsRef': 'id'}

        # check if the channel has a pollToken if not fall back to dateRange
        last_poll_token = self._get_poll_token(channel)
        if last_poll_token is not None:
            logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token))
            payload['pollToken'] = last_poll_token
        else:
            payload['dateRange'] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated))
            logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload['dateRange']))

        tree = self._get_tree('items', payload)
        status_code = tree.find('status').get('code') if tree.tag == 'results' else tree.get('code')
        # check the returned status
        if status_code != '10':
            logger.warn("Reuters channel request returned status code {}".format(status_code))
            # status code 30 indicates failure
            if status_code == '30':
                # invalid token
                logger.warn("Reuters error on channel {} code {} {}".format(channel, tree.find('error').get('code'),
                                                                            tree.find('error').text))
                if tree.find('error').get('code') == '2100':
                    self._save_poll_token(channel, None)
                    logger.warn("Reuters channel invalid token reseting {}".format(status_code))
                return ids

        # extract the returned poll token if there is one
        poll_token = tree.find('pollToken')
        if poll_token is not None:
            # a new token indicated new content
            if poll_token.text != last_poll_token:
                logger.info("Reuters channel {} new token {}".format(channel, poll_token.text))
                self._save_poll_token(channel, poll_token.text)
            else:
                # the token has not changed, so nothing new
                logger.info("Reuters channel {} nothing new".format(channel))
                return ids
        else:
            logger.info("Reuters channel {} retrieved no token".format(channel))
            return ids

        for result in tree.findall('result'):
            id = result.find('id').text
            ids.add(id)
            logger.info("Reuters id : {}".format(id))

        return ids
Example #34
0
    def _validate(self, doc, fields=False, **kwargs):
        item = deepcopy(
            doc['validate']
        )  # make a copy for signal before validation processing
        use_headline = kwargs and 'headline' in kwargs
        validators = self._get_validators(doc)
        for validator in validators:
            validation_schema = self._get_validator_schema(validator)
            self._sanitize_fields(doc['validate'], validator)
            self._set_default_subject_scheme(doc['validate'])
            self._process_media(doc['validate'], validation_schema)
            self._process_sms(doc['validate'], validation_schema)
            self._process_media_metadata(doc['validate'], validation_schema)
            v = SchemaValidator()
            v.allow_unknown = True
            try:
                v.validate(doc['validate'], validation_schema)
            except TypeError as e:
                logger.exception('Invalid validator schema value "%s" for ' %
                                 str(e))
            error_list = v.errors
            response = []
            for e in error_list:
                messages = []
                # Ignore dateline if item is corrected because it can't be changed after the item is published
                if doc.get('act', None) == 'correct' and e == 'dateline':
                    continue
                elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \
                        e in ('headline', 'abstract', 'body_html'):
                    continue
                elif e == 'extra':
                    for field in error_list[e]:
                        display_name = self._get_vocabulary_display_name(field)
                        if 'required' in error_list[e][field]:
                            messages.append(
                                ERROR_MESSAGES[REQUIRED_ERROR].format(
                                    display_name))
                        else:
                            error_field = self.get_error_field_name(
                                display_name)
                            messages.append('{} {}'.format(
                                error_field, error_list[e][field]))
                elif 'required field' in error_list[e] or type(
                        error_list[e]) is dict or type(error_list[e]) is list:
                    display_name = self._get_vocabulary_display_name(e)
                    error_field = self.get_error_field_name(display_name)
                    messages.append(ERROR_MESSAGES[REQUIRED_ERROR].format(
                        error_field.upper()))
                elif 'min length is 1' == error_list[
                        e] or 'null value not allowed' in error_list[e]:
                    messages.append(ERROR_MESSAGES[REQUIRED_ERROR].format(
                        e.upper()))
                elif 'min length is' in error_list[e]:
                    error_field = self.get_error_field_name(e)
                    messages.append(ERROR_MESSAGES[TOO_SHORT].format(
                        error_field.upper()))
                elif 'max length is' in error_list[e]:
                    error_field = self.get_error_field_name(e)
                    messages.append(ERROR_MESSAGES[TOO_LONG].format(
                        error_field.upper()))
                else:
                    error_field = self.get_error_field_name(e)
                    messages.append('{} {}'.format(
                        error_field.upper(), ERROR_MESSAGES[error_list[e]] if
                        ERROR_MESSAGES.get(error_list[e]) else error_list[e]))

                for message in messages:
                    if use_headline:
                        headline = '{}: {}'.format(
                            doc['validate'].get('headline',
                                                doc['validate'].get('_id')),
                            message)
                        response.append(headline)
                    else:
                        response.append(message)

            # let custom code do additional validation
            item_validate.send(self,
                               item=item,
                               response=response,
                               error_fields=v.errors)

            if fields:
                return response, v.errors
            return response
        else:
            logger.warn('validator was not found for {}'.format(doc['act']))
            if fields:
                return [], {}
            return []