Exemple #1
0
def step_impl_fetch_from_provider_ingest(context, provider_name, guid):
    with context.app.test_request_context(context.app.config['URL_PREFIX']):
        ingest_provider_service = get_resource_service('ingest_providers')
        provider = ingest_provider_service.find_one(name=provider_name, req=None)

        provider_service = get_feeding_service(provider['feeding_service'])
        file_path = os.path.join(provider.get('config', {}).get('path', ''), guid)
        feeding_parser = provider_service.get_feed_parser(provider)
        if isinstance(feeding_parser, XMLFeedParser):
            with open(file_path, 'rb') as f:
                xml_string = etree.etree.fromstring(f.read())
                parsed = feeding_parser.parse(xml_string, provider)
        else:
            parsed = feeding_parser.parse(file_path, provider)

        items = [parsed] if not isinstance(parsed, list) else parsed

        for item in items:
            item['versioncreated'] = utcnow()
            item['expiry'] = utcnow() + timedelta(minutes=20)

        failed = context.ingest_items(items, provider, provider_service)
        assert len(failed) == 0, failed

        provider = ingest_provider_service.find_one(name=provider_name, req=None)
        ingest_provider_service.system_update(provider['_id'], {LAST_ITEM_UPDATE: utcnow()}, provider)

        for item in items:
            set_placeholder(context, '{}.{}'.format(provider_name, item['guid']), item['_id'])
Exemple #2
0
def update_provider(provider, rule_set=None, routing_scheme=None, sync=False):
    """Fetch items from ingest provider, ingest them into Superdesk and update the provider.

    :param provider: Ingest Provider data
    :param rule_set: Translation Rule Set if one is associated with Ingest Provider.
    :param routing_scheme: Routing Scheme if one is associated with Ingest Provider.
    :param sync: Running in sync mode from cli.
    """
    lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD])

    if not lock(lock_name, expire=UPDATE_TTL + 10):
        if sync:
            logger.error('update is already running for %s', provider['name'])
        return

    try:
        feeding_service = get_feeding_service(provider['feeding_service'])
        update = {LAST_UPDATED: utcnow()}

        if sync:
            provider[LAST_UPDATED] = utcnow() - timedelta(days=9999) # import everything again

        generator = feeding_service.update(provider, update)
        if isinstance(generator, list):
            generator = (items for items in generator)
        failed = None
        while True:
            try:
                items = generator.send(failed)
                failed = ingest_items(items, provider, feeding_service, rule_set, routing_scheme)
                update_last_item_updated(update, items)
            except StopIteration:
                break

        # Some Feeding Services update the collection and by this time the _etag might have been changed.
        # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised.
        ingest_provider_service = superdesk.get_resource_service('ingest_providers')
        provider = ingest_provider_service.find_one(req=None, _id=provider[superdesk.config.ID_FIELD])
        ingest_provider_service.system_update(provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            admins = superdesk.get_resource_service('users').get_users_by_user_type('administrator')
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers', user_list=admins, name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD]))

        if LAST_ITEM_UPDATE in update:  # Only push a notification if there has been an update
            push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD]))
    except Exception as e:
        logger.error("Failed to ingest file: {error}".format(error=e))
        raise IngestFileError(3000, e, provider)
    finally:
        unlock(lock_name)
    def _test_config(self, updates, original=None):
        provider = original.copy() if original else {}
        provider.update(updates)

        try:
            service = get_feeding_service(provider['feeding_service'])
        except KeyError:
            return
        service.config_test(provider)
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' %
                provider.get('_id', 'Detached items'))

    try:
        feeding_service = get_feeding_service(provider['feeding_service'])
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, 'service') else 'ingest'
    except KeyError:
        ingest_collection = 'ingest'

    ingest_service = superdesk.get_resource_service(ingest_collection)

    items = get_expired_items(provider, ingest_collection)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [
        rend.get('media') for item in items
        for rend in item.get('renditions', {}).values()
        if not item.get('archived') and rend.get('media')
    ]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    logger.info('Removed expired content for provider: {0} count: {1}'.format(
        provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic(ingest_collection)
Exemple #5
0
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info("Removing expired content for provider: %s" %
                provider.get("_id", "Detached items"))

    try:
        feeding_service = get_feeding_service(provider["feeding_service"])
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, "service") else "ingest"
    except KeyError:
        ingest_collection = "ingest"

    ingest_service = superdesk.get_resource_service(ingest_collection)

    items = get_expired_items(provider, ingest_collection)

    ids = [item["_id"] for item in items]
    items.rewind()
    file_ids = [
        rend.get("media") for item in items
        for rend in item.get("renditions", {}).values()
        if not item.get("archived") and rend.get("media")
    ]

    if ids:
        logger.info("Removing items %s" % ids)
        ingest_service.delete({"_id": {"$in": ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info("Deleting file: %s" % file_id)
        superdesk.app.media.delete(file_id)

    logger.info("Removed expired content for provider: {0} count: {1}".format(
        provider.get("_id", "Detached items"), len(ids)))

    remove_expired_from_elastic(ingest_collection)
 def _get_provider_service(self, provider):
     return get_feeding_service(provider["feeding_service"])
    def test_ingest_update_same_event(self):
        xml = ET.fromstring(
            """<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>
            <document>
            <guid>NTB-123456</guid>
            <time>2016-08-10T15:02:02</time>
            <publiseres>True</publiseres>
            <ntbId>NBRP160810_144545_ja_00</ntbId>
            <service>newscalendar</service>
            <title>Original Content</title>
            <location>Fr. Nansens plass 17, Tromsø, Troms</location>
            <timeStart>2016-09-05T09:00:00</timeStart>
            <timeEnd>2016-09-05T16:00:00</timeEnd>
            <alldayevent>False</alldayevent>
            <priority>5</priority>
            <regions>
            <region>Norge</region>
            </regions>
            <districts>
            <district parent="Norge">Troms</district>
            </districts>
            <category>Innenriks</category>
            <subcategory>Redplan element</subcategory>
            <subjects>
            <subject>Kriminalitet og rettsvesen</subject>
            <subject parent="Kriminalitet">Drap;Rettssaker</subject>
            </subjects>
            <emailwriter>[email protected]</emailwriter>
            <messagetype>Redplan redaksjon</messagetype>
            <geo>
            <latitude>69.65482639999999</latitude>
            <longitude>18.96509590000005</longitude>
            </geo>
            <content>Original Content</content>
            <mediaList>
            <media id="" mediaType="" mimeType="ukjent">
            <caption></caption>
            </media>
            </mediaList>
            </document>""")

        with self.app.test_request_context(self.app.config['URL_PREFIX']):
            # ingest event
            events = self.get_parsed_documents(
                registered_feed_parsers.get('ntb_event_xml'), xml)
            provider = get_resource_service('ingest_providers').find_one(
                req=None, _id=self.providers.get('ntbevent'))
            self.ingest_items(events, provider,
                              get_feeding_service('event_file'))
            ingested_event = get_resource_service('events').find_one(
                req=None, _id='NTB-123456')
            self.assertTrue(ingested_event['_id'], 'NTB-123456')
            self.assertTrue(ingested_event['name'], 'Original Content')
            self.assertTrue(ingested_event['dates']['start'],
                            '2016-09-05T09:00:00')
            self.assertTrue(ingested_event['dates']['end'],
                            '2016-09-05T16:00:00')
            self.assertTrue(
                ingested_event['_planning_schedule'][0]['scheduled'],
                '2016-09-05T09:00:00')

            # ingest updated event
            events = self.get_parsed_documents(
                registered_feed_parsers.get('ntb_event_xml'), xml)
            events[0]['dates']['start'] = '2016-09-06T10:00:00'
            events[0]['dates']['end'] = '2016-09-06T14:00:00'
            events[0]['name'] = 'Updated Content'
            self.ingest_items(events, provider,
                              get_feeding_service('event_file'))
            ingested_event = get_resource_service('events').find_one(
                req=None, _id='NTB-123456')
            self.assertTrue(ingested_event['_id'], 'NTB-123456')
            self.assertTrue(ingested_event['name'], 'Updated Content')
            self.assertTrue(ingested_event['dates']['start'],
                            '2016-09-05T09:00:00')
            self.assertTrue(ingested_event['dates']['end'],
                            '2016-09-05T16:00:00')
            self.assertTrue(
                ingested_event['_planning_schedule'][0]['scheduled'],
                '2016-09-16T16:00:00')