def apply_rule_set(item, provider, rule_set=None):
    """Applies rules set on the item to be ingested into the system.

    If there's no rule set then the item will
    be returned without any change.

    :param item: Item to be ingested
    :param provider: provider object from whom the item was received
    :return: item
    """
    try:
        if rule_set is None and provider.get('rule_set') is not None:
            rule_set = superdesk.get_resource_service('rule_sets').find_one(_id=provider['rule_set'], req=None)

        if rule_set and 'body_html' in item:
            body = item['body_html']

            for rule in rule_set['rules']:
                body = body.replace(rule['old'], rule['new'])

            item['body_html'] = body

        return item
    except Exception as ex:
        raise ProviderError.ruleError(ex, provider)
    def create(self, docs, **kwargs):
        new_guids = []
        provider = self.get_provider()
        for doc in docs:
            if not doc.get('desk'):
                # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.")
            try:
                archived_doc = self.fetch(doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc['_id'] = new_id
            generate_unique_id_and_name(dest_doc)

            if provider:
                dest_doc['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc['_id']
            dest_doc[FAMILY_ID] = archived_doc['_id']
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc.get('_id'))

        return new_guids
Exemple #3
0
    def create(self, docs, **kwargs):
        new_guids = []
        provider = self.get_provider()
        for doc in docs:
            if not doc.get('desk'):
                # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError(
                    _("Destination desk cannot be empty."))
            try:
                archived_doc = self.fetch(doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, provider)

            dest_doc = fetch_item(archived_doc,
                                  doc.get('desk'),
                                  doc.get('stage'),
                                  state=doc.get('state'))
            new_guids.append(dest_doc['guid'])

            if provider:
                dest_doc['ingest_provider'] = str(
                    provider[superdesk.config.ID_FIELD])

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc.get('_id'))

        if new_guids:
            get_resource_service('search_providers').system_update(
                provider.get(config.ID_FIELD), {'last_item_update': utcnow()},
                provider)

        return new_guids
    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)

        if not self.path:
            logger.info('No path')
            return []

        for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
            try:
                filepath = os.path.join(self.path, filename)
                if os.path.isfile(filepath):
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
                    if self.is_latest_content(last_updated, provider.get('last_updated')):
                        item = self.parser.parse_file(filepath, self)

                        self.move_file(self.path, filename, provider=provider, success=True)
                        yield [item]
                    else:
                        self.move_file(self.path, filename, provider=provider, success=True)
            except ParserError.ZCZCParserError as ex:
                logger.exception("Ingest Type: Teletype - File: {0} could not be processed".format(filename))
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ParserError.ZCZCParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
            except Exception as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ProviderError.ingestError(ex, provider)
def apply_rule_set(item, provider, rule_set=None):
    """
    Applies rules set on the item to be ingested into the system. If there's no rule set then the item will
    be returned without any change.

    :param item: Item to be ingested
    :param provider: provider object from whom the item was received
    :return: item
    """
    try:
        if rule_set is None and provider.get('rule_set') is not None:
            rule_set = superdesk.get_resource_service('rule_sets').find_one(
                _id=provider['rule_set'], req=None)

            if rule_set and 'body_html' in item:
                body = item['body_html']

                for rule in rule_set['rules']:
                    body = body.replace(rule['old'], rule['new'])

                item['body_html'] = body

        return item
    except Exception as ex:
        raise ProviderError.ruleError(ex, provider)
Exemple #6
0
    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)
        if not self.path:
            return

        for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
            try:
                if os.path.isfile(os.path.join(self.path, filename)):
                    filepath = os.path.join(self.path, filename)
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
                    if self.is_latest_content(last_updated, provider.get('last_updated')):
                        with open(os.path.join(self.path, filename), 'r') as f:
                            item = self.parser.parse_message(etree.fromstring(f.read()), provider)

                            self.add_timestamps(item)
                            self.move_file(self.path, filename, provider=provider, success=True)
                            yield [item]
                    else:
                        self.move_file(self.path, filename, provider=provider, success=True)
            except etreeParserError as ex:
                logger.exception("Ingest Type: AFP - File: {0} could not be processed".format(filename), ex)
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ParserError.newsmlOneParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
            except Exception as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ProviderError.ingestError(ex, provider)

        push_notification('ingest:update')
 def remove_expired(self, provider):
     try:
         remove_expired_data(provider)
         push_notification('ingest:cleaned')
     except (Exception) as err:
         logger.exception(err)
         raise ProviderError.expiredContentError(err, provider)
def filter_expired_items(provider, items):
    try:
        days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP)
        expiration_date = utcnow() - timedelta(days=days_to_keep_content)
        return [item for item in items if item.get('versioncreated', utcnow()) > expiration_date]
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
Exemple #9
0
def filter_expired_items(provider, items):
    """
    Filters out the item from the list of articles to be ingested
    if they are expired and item['type'] not in provider['content_types'].

    :param provider: Ingest Provider Details.
    :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
    :param items: list of items received from the provider
    :type items: list
    :return: list of items which can be saved into ingest collection
    :rtype: list
    """

    def is_not_expired(item):
        if item.get('expiry') or item.get('versioncreated'):
            expiry = item.get('expiry', item['versioncreated'] + delta)
            if expiry.tzinfo:
                return expiry > utcnow()
            else:
                return expiry > datetime.now()
        return False

    try:
        delta = timedelta(minutes=provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']))
        filtered_items = [item for item in items if is_not_expired(item) and
                          item.get(ITEM_TYPE, 'text') in provider.get('content_types', [])]

        if len(items) != len(filtered_items):
            logger.debug('Received {0} articles from provider {1}, but only {2} are eligible to be saved in ingest'
                         .format(len(items), provider['name'], len(filtered_items)))

        return filtered_items
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
Exemple #10
0
    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)

        if not self.path:
            logger.info('No path')
            return []

        for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
            try:
                filepath = os.path.join(self.path, filename)
                if os.path.isfile(filepath):
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
                    if self.is_latest_content(last_updated, provider.get('last_updated')):
                        item = self.parser.parse_file(filepath, self)

                        self.move_file(self.path, filename, provider=provider, success=True)
                        yield [item]
                    else:
                        self.move_file(self.path, filename, provider=provider, success=True)
            except ParserError.ZCZCParserError as ex:
                logger.exception("Ingest Type: Teletype - File: {0} could not be processed".format(filename))
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ParserError.ZCZCParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
            except Exception as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ProviderError.ingestError(ex, provider)
def process_iptc_codes(item, provider):
    """
    Ensures that the higher level IPTC codes are present by inserting them if missing, for example
    if given 15039001 (Formula One) make sure that 15039000 (motor racing) and 15000000 (sport) are there as well

    :param item: A story item
    :return: A story item with possible expanded subjects
    """
    try:

        def iptc_already_exists(code):
            for entry in item['subject']:
                if 'qcode' in entry and code == entry['qcode']:
                    return True
            return False

        for subject in item['subject']:
            if 'qcode' in subject and len(subject['qcode']) == 8:
                top_qcode = subject['qcode'][:2] + '000000'
                if not iptc_already_exists(top_qcode):
                    item['subject'].append({
                        'qcode': top_qcode,
                        'name': subject_codes[top_qcode]
                    })

                mid_qcode = subject['qcode'][:5] + '000'
                if not iptc_already_exists(mid_qcode):
                    item['subject'].append({
                        'qcode': mid_qcode,
                        'name': subject_codes[mid_qcode]
                    })
    except Exception as ex:
        raise ProviderError.iptcError(ex, provider)
Exemple #12
0
    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)

        if not self.path:
            return []

        for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
            try:
                filepath = os.path.join(self.path, filename)
                if os.path.isfile(filepath):
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
                    if self.is_latest_content(last_updated, provider.get('last_updated')):
                        with open(filepath, 'r') as f:
                            item = self.parser.parse_message(etree.fromstring(f.read()), provider)
                            self.move_file(self.path, filename, provider=provider, success=True)
                            yield [item]
                    else:
                        self.move_file(self.path, filename, provider=provider, success=True)
            except etreeParserError as ex:
                logger.exception("Ingest Type: AAP - File: {0} could not be processed".format(filename))
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ParserError.nitfParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
            except Exception as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ProviderError.ingestError(ex, provider)

        push_notification('ingest:update')
def process_iptc_codes(item, provider):
    """
    Ensures that the higher level IPTC codes are present by inserting them if missing, for example
    if given 15039001 (Formula One) make sure that 15039000 (motor racing) and 15000000 (sport) are there as well

    :param item: A story item
    :return: A story item with possible expanded subjects
    """
    try:
        def iptc_already_exists(code):
            for entry in item['subject']:
                if 'qcode' in entry and code == entry['qcode']:
                    return True
            return False

        for subject in item['subject']:
            if 'qcode' in subject and len(subject['qcode']) == 8:
                top_qcode = subject['qcode'][:2] + '000000'
                if not iptc_already_exists(top_qcode):
                    item['subject'].append({'qcode': top_qcode, 'name': subject_codes[top_qcode]})

                mid_qcode = subject['qcode'][:5] + '000'
                if not iptc_already_exists(mid_qcode):
                    item['subject'].append({'qcode': mid_qcode, 'name': subject_codes[mid_qcode]})
    except Exception as ex:
        raise ProviderError.iptcError(ex, provider)
Exemple #14
0
    def create(self, docs, **kwargs):
        new_guids = []
        provider = get_resource_service("ingest_providers").find_one(source="aapmm", req=None)
        if provider and "config" in provider and "username" in provider["config"]:
            self.backend.set_credentials(provider["config"]["username"], provider["config"]["password"])
        for doc in docs:
            if not doc.get("desk"):
                # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.")
            try:
                archived_doc = self.backend.find_one_raw(doc["guid"], doc["guid"])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc["_id"] = new_id
            generate_unique_id_and_name(dest_doc)

            if provider:
                dest_doc["ingest_provider"] = str(provider[superdesk.config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc, update=None, desk_id=doc.get("desk"), stage_id=doc.get("stage"))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc["_id"]
            dest_doc[FAMILY_ID] = archived_doc["_id"]
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc.get("_id"))

        return new_guids
 def remove_expired(self, provider):
     try:
         remove_expired_data(provider)
         push_notification('ingest:cleaned')
     except (Exception) as err:
         logger.exception(err)
         raise ProviderError.expiredContentError(err, provider)
def filter_expired_items(provider, items):
    """Filter out expired items from the list of articles to be ingested.

    Filte both expired and `item['type'] not in provider['content_types']`.

    :param provider: Ingest Provider Details.
    :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
    :param items: list of items received from the provider
    :type items: list
    :return: list of items which can be saved into ingest collection
    :rtype: list
    """
    def is_not_expired(item):
        if item.get('expiry') or item.get('versioncreated'):
            expiry = item.get('expiry', item['versioncreated'] + delta)
            if expiry.tzinfo:
                return expiry > utcnow()
            else:
                return expiry > datetime.now()
        return False

    try:
        delta = timedelta(minutes=provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']))
        filtered_items = [item for item in items if is_not_expired(item) and
                          item.get(ITEM_TYPE, 'text') in provider.get('content_types', [])]

        if len(items) != len(filtered_items):
            logger.debug('Received {0} articles from provider {1}, but only {2} are eligible to be saved in ingest'
                         .format(len(items), provider['name'], len(filtered_items)))

        return filtered_items
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
class TestProviderService(FeedingService):

    NAME = 'test'

    ERRORS = [ProviderError.anpaError(None, None).get_error_description()]

    def _update(self, provider, update):
        return []
 def test_ingest_provider_closed_when_critical_error_raised(self):
     provider_name = 'AAP'
     provider = self._get_provider(provider_name)
     self.assertFalse(provider.get('is_closed'))
     provider_service = self._get_provider_service(provider)
     provider_service.provider = provider
     provider_service.close_provider(provider, ProviderError.anpaError())
     provider = self._get_provider(provider_name)
     self.assertTrue(provider.get('is_closed'))
 def test_ingest_provider_closed_when_critical_error_raised(self):
     provider_name = "AAP"
     provider = self._get_provider(provider_name)
     self.assertFalse(provider.get("is_closed"))
     provider_service = self._get_provider_service(provider)
     provider_service.provider = provider
     provider_service.close_provider(provider, ProviderError.anpaError())
     provider = self._get_provider(provider_name)
     self.assertTrue(provider.get("is_closed"))
def filter_expired_items(provider, items):
    try:
        days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP)
        expiration_date = utcnow() - timedelta(days=days_to_keep_content)
        return [
            item for item in items
            if item.get('versioncreated', utcnow()) > expiration_date
        ]
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
 def run(self, provider_type=None):
     for provider in superdesk.get_resource_service('ingest_providers').get(req=None, lookup={}):
         if not provider_type or provider_type == provider.get('type'):
             try:
                 remove_expired_data(provider)
             except (Exception) as err:
                 logger.exception(err)
                 raise ProviderError.expiredContentError(err, provider)
             finally:
                 push_notification('ingest:cleaned')
Exemple #22
0
    def run(self, provider=None):
            if provider:
                try:
                    data = {}
                    data = superdesk.json.loads(provider)
                    data.setdefault('content_expiry', superdesk.app.config['INGEST_EXPIRY_MINUTES'])

                    validator = superdesk.app.validator(superdesk.app.config['DOMAIN']['ingest_providers']['schema'],
                                                        'ingest_providers')
                    validation = validator.validate(data)

                    if validation:
                        get_resource_service('ingest_providers').post([data])
                        return data
                    else:
                        ex = Exception('Failed to add Provider as the data provided is invalid. Errors: {}'
                                       .format(str(validator.errors)))
                        raise ProviderError.providerAddError(exception=ex, provider=data)
                except Exception as ex:
                    raise ProviderError.providerAddError(ex, data)
 def test_ingest_provider_closed_when_critical_error_raised(self):
     provider_name = 'AAP'
     with self.app.app_context():
         provider = self._get_provider(provider_name)
         self.assertFalse(provider.get('is_closed'))
         provider_service = self._get_provider_service(provider)
         provider_service.provider = provider
         provider_service.close_provider(provider,
                                         ProviderError.anpaError())
         provider = self._get_provider(provider_name)
         self.assertTrue(provider.get('is_closed'))
Exemple #24
0
def process_anpa_category(item, provider):
    try:
        anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories')
        if anpa_categories:
            for anpa_category in anpa_categories['items']:
                if anpa_category['is_active'] is True \
                        and item['anpa-category']['qcode'].lower() == anpa_category['value'].lower():
                    item['anpa-category'] = {'qcode': item['anpa-category']['qcode'], 'name': anpa_category['name']}
                    break
    except Exception as ex:
        raise ProviderError.anpaError(ex, provider)
Exemple #25
0
    def create(self, docs, **kwargs):
        search_provider = get_resource_service('search_providers').find_one(
            search_provider=PROVIDER_NAME, req=None)

        if not search_provider or search_provider.get('is_closed', False):
            raise SuperdeskApiError.badRequestError(
                'No search provider found or the search provider is closed.')

        if 'config' in search_provider:
            self.backend.set_credentials(search_provider['config'])

        new_guids = []
        for doc in docs:
            if not doc.get(
                    'desk'):  # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError(
                    "Destination desk cannot be empty.")

            try:
                archived_doc = self.backend.find_one_raw(
                    doc['guid'], doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, search_provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc[config.ID_FIELD] = new_id
            generate_unique_id_and_name(dest_doc)

            if search_provider:
                dest_doc['ingest_provider'] = str(
                    search_provider[config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc,
                    update=None,
                    desk_id=doc.get('desk'),
                    stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD]
            dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD]
            dest_doc[ITEM_OPERATION] = ITEM_FETCH
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc[config.ID_FIELD])

            get_resource_service('search_providers').system_update(
                search_provider[config.ID_FIELD],
                {'last_item_update': utcnow()}, search_provider)

        return new_guids
 def run(self, provider_type=None):
     for provider in superdesk.get_resource_service('ingest_providers').get(
             req=None, lookup={}):
         if not provider_type or provider_type == provider.get('type'):
             try:
                 remove_expired_data(provider)
             except (Exception) as err:
                 logger.exception(err)
                 raise ProviderError.expiredContentError(err, provider)
             finally:
                 push_notification('ingest:cleaned')
def filter_expired_items(provider, items):
    def is_not_expired(item):
        if item.get("expiry") or item.get("versioncreated"):
            expiry = item.get("expiry", item["versioncreated"] + delta)
            if expiry.tzinfo:
                return expiry > utcnow()
        return False

    try:
        delta = timedelta(minutes=provider.get("content_expiry", INGEST_EXPIRY_MINUTES))
        return [item for item in items if is_not_expired(item)]
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
Exemple #28
0
 def run(self, provider=None):
         if provider:
             try:
                 data = {}
                 data = superdesk.json.loads(provider)
                 data.setdefault('name', data['type'])
                 data.setdefault('source', data['type'])
                 data.setdefault('content_expiry', INGEST_EXPIRY_MINUTES)
                 db = superdesk.get_db()
                 db['ingest_providers'].save(data)
                 return data
             except Exception as ex:
                 raise ProviderError.providerAddError(ex, data)
Exemple #29
0
 def run(self, provider=None):
     if provider:
         try:
             data = {}
             data = superdesk.json.loads(provider)
             data.setdefault('name', data['type'])
             data.setdefault('source', data['type'])
             data.setdefault('days_to_keep', DAYS_TO_KEEP)
             db = superdesk.get_db()
             db['ingest_providers'].save(data)
             return data
         except Exception as ex:
             raise ProviderError.providerAddError(ex, data)
def filter_expired_items(provider, items):
    def is_not_expired(item):
        if item.get('expiry') or item.get('versioncreated'):
            expiry = item.get('expiry', item['versioncreated'] + delta)
            if expiry.tzinfo:
                return expiry > utcnow()
        return False

    try:
        delta = timedelta(minutes=provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']))
        return [item for item in items if is_not_expired(item)]
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
Exemple #31
0
 def run(self, provider=None):
     if provider:
         try:
             data = {}
             data = superdesk.json.loads(provider)
             data.setdefault('name', data['type'])
             data.setdefault('source', data['type'])
             data.setdefault('content_expiry', INGEST_EXPIRY_MINUTES)
             db = superdesk.get_db()
             db['ingest_providers'].save(data)
             return data
         except Exception as ex:
             raise ProviderError.providerAddError(ex, data)
def process_anpa_category(item, provider):
    try:
        anpa_categories = superdesk.get_resource_service("vocabularies").find_one(req=None, _id="categories")
        if anpa_categories:
            for anpa_category in anpa_categories["items"]:
                if (
                    anpa_category["is_active"] is True
                    and item["anpa-category"]["qcode"].lower() == anpa_category["value"].lower()
                ):
                    item["anpa-category"] = {"qcode": item["anpa-category"]["qcode"], "name": anpa_category["name"]}
                    break
    except Exception as ex:
        raise ProviderError.anpaError(ex, provider)
 def run(self, provider=None):
         if provider:
             try:
                 data = {}
                 data = superdesk.json.loads(provider)
                 data.setdefault('name', data['type'])
                 data.setdefault('source', data['type'])
                 data.setdefault('days_to_keep', DAYS_TO_KEEP)
                 db = superdesk.get_db()
                 db['ingest_providers'].save(data)
                 return data
             except Exception as ex:
                 raise ProviderError.providerAddError(ex, data)
def filter_expired_items(provider, items):
    def is_not_expired(item):
        if item.get('expiry') or item.get('versioncreated'):
            expiry = item.get('expiry', item['versioncreated'] + delta)
            if expiry.tzinfo:
                return expiry > utcnow()
        return False

    try:
        delta = timedelta(minutes=provider.get(
            'content_expiry', app.config['INGEST_EXPIRY_MINUTES']))
        return [item for item in items if is_not_expired(item)]
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
    def _request(self, url, data):
        """Perform GET request to given url.

        It adds predefined headers and auth token if available.

        :param url
        :param data
        """
        r = requests.post(url, data=json.dumps(data), headers=self._headers, auth=(self._user, self._password))

        if r.status_code < 200 or r.status_code >= 300:
            logger.error('error fetching url=%s status=%s content=%s' % (url, r.status_code, r.content or ''))
            raise ProviderError.externalProviderError("Scanpix request can't be performed")
        return r
def process_anpa_category(item, provider):
    try:
        anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories')
        if anpa_categories:
            for item_category in item['anpa_category']:
                for anpa_category in anpa_categories['items']:
                    if anpa_category['is_active'] is True \
                            and item_category['qcode'].lower() == anpa_category['qcode'].lower():
                        item_category['name'] = anpa_category['name']
                        # make the case of the qcode match what we hold in our dictionary
                        item_category['qcode'] = anpa_category['qcode']
                        break
    except Exception as ex:
        raise ProviderError.anpaError(ex, provider)
    def run(self, provider):
        try:
            data = {}
            data = superdesk.json.loads(provider)
            data.setdefault("content_expiry",
                            superdesk.app.config["INGEST_EXPIRY_MINUTES"])

            validator = superdesk.app.validator(
                superdesk.app.config["DOMAIN"]["ingest_providers"]["schema"],
                "ingest_providers")
            validation = validator.validate(data)

            if validation:
                get_resource_service("ingest_providers").post([data])
                return data
            else:
                ex = Exception(
                    "Failed to add Provider as the data provided is invalid. Errors: {}"
                    .format(str(validator.errors)))
                raise ProviderError.providerAddError(exception=ex,
                                                     provider=data)
        except Exception as ex:
            raise ProviderError.providerAddError(ex, data)
def process_anpa_category(item, provider):
    try:
        anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories')
        if anpa_categories:
            for item_category in item['anpa_category']:
                for anpa_category in anpa_categories['items']:
                    if anpa_category['is_active'] is True \
                            and item_category['qcode'].lower() == anpa_category['qcode'].lower():
                        item_category['name'] = anpa_category['name']
                        # make the case of the qcode match what we hold in our dictionary
                        item_category['qcode'] = anpa_category['qcode']
                        break
    except Exception as ex:
        raise ProviderError.anpaError(ex, provider)
    def remove_expired(self, provider):
        lock_name = 'ingest:gc'

        if not lock(lock_name, expire=300):
            return

        try:

            remove_expired_data(provider)
            push_notification('ingest:cleaned')
        except Exception as err:
            logger.exception(err)
            raise ProviderError.expiredContentError(err, provider)
        finally:
            unlock(lock_name)
    def remove_expired(self, provider):
        lock_name = 'ingest:gc'

        if not lock(lock_name, expire=300):
            return

        try:

            remove_expired_data(provider)
            push_notification('ingest:cleaned')
        except Exception as err:
            logger.exception(err)
            raise ProviderError.expiredContentError(err, provider)
        finally:
            unlock(lock_name)
def process_anpa_category(item, provider):
    try:
        anpa_categories = superdesk.get_resource_service(
            'vocabularies').find_one(req=None, _id='categories')
        if anpa_categories:
            for anpa_category in anpa_categories['items']:
                if anpa_category['is_active'] is True \
                        and item['anpa-category']['qcode'].lower() == anpa_category['value'].lower():
                    item['anpa-category'] = {
                        'qcode': item['anpa-category']['qcode'],
                        'name': anpa_category['name']
                    }
                    break
    except Exception as ex:
        raise ProviderError.anpaError(ex, provider)
 def test_raise_ruleError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing ruleError")
             raise ex
         except Exception:
             raise ProviderError.ruleError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2003)
     self.assertTrue(exception.message == "Rule could not be applied")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing ruleError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "ProviderError Error 2003 - Rule could not be applied: "
                      "Testing ruleError on channel TestProvider")
 def test_raise_anpaError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing anpaError")
             raise ex
         except Exception:
             raise ProviderError.anpaError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2005)
     self.assertTrue(exception.message == "Anpa category error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing anpaError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "ProviderError Error 2005 - Anpa category error: "
                      "Testing anpaError on channel TestProvider")
 def test_raise_ruleError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing ruleError")
             raise ex
         except Exception:
             raise ProviderError.ruleError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2003)
     self.assertTrue(exception.message == "Rule could not be applied")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing ruleError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "ProviderError Error 2003 - Rule could not be applied: "
                      "Testing ruleError on channel TestProvider")
 def test_raise_anpaError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing anpaError")
             raise ex
         except Exception:
             raise ProviderError.anpaError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2005)
     self.assertTrue(exception.message == "Anpa category error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing anpaError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "ProviderError Error 2005 - Anpa category error: "
                      "Testing anpaError on channel TestProvider")
 def test_raise_providerFilterExpiredContentError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing providerFilterExpiredContentError")
             raise ex
         except Exception:
             raise ProviderError.providerFilterExpiredContentError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2006)
     self.assertTrue(exception.message == "Expired content could not be filtered")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing providerFilterExpiredContentError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "ProviderError Error 2006 - Expired content could not be filtered: "
                      "Testing providerFilterExpiredContentError on channel TestProvider")
def process_anpa_category(item, provider):
    try:
        anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories')
        if anpa_categories:
            for item_category in item['anpa_category']:
                mapped_category = [c for c in anpa_categories['items'] if
                                   c['is_active'] is True and item_category['qcode'].lower() == c['qcode'].lower()]
                # if the category is not known to the system remove it from the item
                if len(mapped_category) == 0:
                    item['anpa_category'].remove(item_category)
                else:
                    item_category['name'] = mapped_category[0]['name']
                    # make the case of the qcode match what we hold in our dictionary
                    item_category['qcode'] = mapped_category[0]['qcode']
    except Exception as ex:
        raise ProviderError.anpaError(ex, provider)
 def test_raise_providerFilterExpiredContentError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing providerFilterExpiredContentError")
             raise ex
         except Exception:
             raise ProviderError.providerFilterExpiredContentError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2006)
     self.assertTrue(exception.message == "Expired content could not be filtered")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing providerFilterExpiredContentError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "ProviderError Error 2006 - Expired content could not be filtered: "
                      "Testing providerFilterExpiredContentError on channel TestProvider")
Exemple #49
0
def process_anpa_category(item, provider):
    try:
        anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories')
        if anpa_categories:
            for item_category in item['anpa_category']:
                mapped_category = [c for c in anpa_categories['items'] if
                                   c['is_active'] is True and item_category['qcode'].lower() == c['qcode'].lower()]
                # if the category is not known to the system remove it from the item
                if len(mapped_category) == 0:
                    item['anpa_category'].remove(item_category)
                else:
                    item_category['name'] = mapped_category[0]['name']
                    # make the case of the qcode match what we hold in our dictionary
                    item_category['qcode'] = mapped_category[0]['qcode']
    except Exception as ex:
        raise ProviderError.anpaError(ex, provider)
def process_iptc_codes(item, provider):
    """Ensures that the higher level IPTC codes are present by inserting them if missing.

    For example if given 15039001 (Formula One) make sure that 15039000 (motor racing) and 15000000 (sport)
    are there as well.

    :param item: A story item
    :return: A story item with possible expanded subjects
    """
    try:

        def iptc_already_exists(code):
            for entry in item["subject"]:
                if "qcode" in entry and code == entry["qcode"]:
                    return True
            return False

        for subject in item["subject"]:
            if "qcode" in subject and len(
                    subject["qcode"]) == 8 and subject["qcode"].isdigit():
                top_qcode = subject["qcode"][:2] + "000000"
                if not iptc_already_exists(top_qcode):
                    try:
                        item["subject"].append({
                            "qcode": top_qcode,
                            "name": subject_codes[top_qcode]
                        })
                    except KeyError:
                        logger.warning(
                            "missing qcode in subject_codes: {qcode}".format(
                                qcode=top_qcode))
                        continue

                mid_qcode = subject["qcode"][:5] + "000"
                if not iptc_already_exists(mid_qcode):
                    try:
                        item["subject"].append({
                            "qcode": mid_qcode,
                            "name": subject_codes[mid_qcode]
                        })
                    except KeyError:
                        logger.warning(
                            "missing qcode in subject_codes: {qcode}".format(
                                qcode=mid_qcode))
                        continue
    except Exception as ex:
        raise ProviderError.iptcError(ex, provider)
Exemple #51
0
 def test_raise_providerAddError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing providerAddError")
             raise ex
         except Exception:
             raise ProviderError.providerAddError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2001)
     self.assertTrue(exception.message == "Provider could not be saved")
     self.assertIsNotNone(exception.system_exception)
     self.assertEquals(exception.system_exception.args[0], "Testing providerAddError")
     self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1)
     self.assertEqual(
         self.mock_logger_handler.messages["error"][0],
         "ProviderError Error 2001 - Provider could not be saved: "
         "Testing providerAddError on channel TestProvider",
     )
Exemple #52
0
 def test_raise_ingestError(self):
     with assert_raises(ProviderError) as error_context:
         try:
             ex = Exception("Testing ingestError")
             raise ex
         except Exception:
             raise ProviderError.ingestError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 2004)
     self.assertTrue(exception.message == "Ingest error")
     self.assertTrue(exception.provider_name == "TestProvider")
     self.assertIsNotNone(exception.system_exception)
     self.assertEquals(exception.system_exception.args[0], "Testing ingestError")
     self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1)
     self.assertEqual(
         self.mock_logger_handler.messages["error"][0],
         "ProviderError Error 2004 - Ingest error: " "Testing ingestError on channel TestProvider",
     )
Exemple #53
0
    def create(self, docs, **kwargs):
        search_provider = get_resource_service('search_providers').find_one(search_provider=PROVIDER_NAME, req=None)

        if not search_provider or search_provider.get('is_closed', False):
            raise SuperdeskApiError.badRequestError('No search provider found or the search provider is closed.')

        if 'config' in search_provider:
            self.backend.set_credentials(search_provider['config'])

        new_guids = []
        for doc in docs:
            if not doc.get('desk'):  # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.")

            try:
                archived_doc = self.backend.find_one_raw(doc['guid'], doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, search_provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc[config.ID_FIELD] = new_id
            generate_unique_id_and_name(dest_doc)

            if search_provider:
                dest_doc['ingest_provider'] = str(search_provider[config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD]
            dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD]
            dest_doc[ITEM_OPERATION] = ITEM_FETCH
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc[config.ID_FIELD])

            get_resource_service('search_providers').system_update(search_provider[config.ID_FIELD],
                                                                   {'last_item_update': utcnow()}, search_provider)

        return new_guids
    def create(self, docs, **kwargs):
        new_guids = []
        provider = self.get_provider()
        for doc in docs:
            if not doc.get('desk'):
                # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError(
                    _("Destination desk cannot be empty."))
            try:
                archived_doc = self.fetch(doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc['_id'] = new_id
            generate_unique_id_and_name(dest_doc)

            if provider:
                dest_doc['ingest_provider'] = str(
                    provider[superdesk.config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc,
                    update=None,
                    desk_id=doc.get('desk'),
                    stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc['_id']
            dest_doc[FAMILY_ID] = archived_doc['_id']
            dest_doc[ITEM_OPERATION] = ITEM_FETCH
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc.get('_id'))

        if new_guids:
            get_resource_service('search_providers').system_update(
                provider.get(config.ID_FIELD), {'last_item_update': utcnow()},
                provider)

        return new_guids
Exemple #55
0
    def create(self, docs, **kwargs):
        new_guids = []
        provider = get_resource_service('ingest_providers').find_one(
            source='aapmm', req=None)
        if provider and 'config' in provider and 'username' in provider[
                'config']:
            self.backend.set_credentials(provider['config']['username'],
                                         provider['config']['password'])
        for doc in docs:
            if not doc.get('desk'):
                # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError(
                    "Destination desk cannot be empty.")
            try:
                archived_doc = self.backend.find_one_raw(
                    doc['guid'], doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc['_id'] = new_id
            generate_unique_id_and_name(dest_doc)

            if provider:
                dest_doc['ingest_provider'] = str(
                    provider[superdesk.config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc,
                    update=None,
                    desk_id=doc.get('desk'),
                    stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc['_id']
            dest_doc[FAMILY_ID] = archived_doc['_id']
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc.get('_id'))

        return new_guids
Exemple #56
0
def filter_expired_items(provider, items):
    """Filter out expired items from the list of articles to be ingested.

    Filte both expired and `item['type'] not in provider['content_types']`.

    :param provider: Ingest Provider Details.
    :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
    :param items: list of items received from the provider
    :type items: list
    :return: list of items which can be saved into ingest collection
    :rtype: list
    """
    try:
        try:
            content_expiry = int(provider['content_expiry'])
        except ValueError:
            logger.warning(
                'invalid content_expiry: content_expiry={value}'.format(
                    value=provider['content_expiry']))
            del provider['content_expiry']
            content_expiry = None
        except (TypeError, KeyError):
            content_expiry = None
        else:
            if content_expiry < 0:
                del provider['content_expiry']
                content_expiry = None

        delta = timedelta(
            minutes=content_expiry or app.config['INGEST_EXPIRY_MINUTES'])
        filtered_items = [
            item for item in items if is_not_expired(item, delta) and item.get(
                ITEM_TYPE, 'text') in provider.get('content_types', [])
        ]

        if len(items) != len(filtered_items):
            logger.warning(
                'Received {0} articles from provider {1}, but only {2} are eligible to be saved in ingest'
                .format(len(items), provider['name'], len(filtered_items)))

        return filtered_items
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
def ingest_item(item, provider, rule_set=None):
    try:
        item.setdefault('_id', item['guid'])
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider['_id'])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)

        if 'anpa-category' in item:
            process_anpa_category(item, provider)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(baseImageRend['href'])
                update_renditions(item, href)

        old_item = ingest_service.find_one(_id=item['guid'], req=None)

        if old_item:
            ingest_service.put(item['guid'], item)
        else:
            try:
                ingest_service.post([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)
                ingest_service.put(item['guid'], item)
    except ProviderError:
        raise
    except Exception as ex:
        raise ProviderError.ingestError(ex, provider)
 def mock_update(provider):
     raise ProviderError.anpaError()
from superdesk.errors import SuperdeskApiError, ProviderError
from superdesk.io import register_feeding_service, registered_feeding_services
from .tests import setup_providers, teardown_providers
from superdesk.io.feeding_services import FeedingService
from superdesk.io.commands.remove_expired_content import get_expired_items, RemoveExpiredContent
from superdesk.celery_task_utils import mark_task_as_not_running, is_task_running
from test_factory import SuperdeskTestCase


class TestProviderService(FeedingService):

    def _update(self, provider):
        return []


register_feeding_service('test', TestProviderService(), [ProviderError.anpaError(None, None).get_error_description()])


class CeleryTaskRaceTest(SuperdeskTestCase):

    def test_the_second_update_fails_if_already_running(self):
        provider = {'_id': 'abc', 'name': 'test provider', 'update_schedule': {'minutes': 1}}
        removed = mark_task_as_not_running(provider['name'], provider['_id'])
        self.assertFalse(removed)

        failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1})
        self.assertFalse(failed_to_mark_as_running, 'Failed to mark ingest update as running')

        failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1})
        self.assertTrue(failed_to_mark_as_running, 'Ingest update marked as running, possible race condition')