def remove_expired_data(provider):
    """Remove expired data for provider"""
    print("Removing expired content for provider: %s" % provider.get("_id", "Detached items"))
    minutes_to_keep_content = provider.get("content_expiry", superdesk.app.config["INGEST_EXPIRY_MINUTES"])
    expiration_date = utcnow() - timedelta(minutes=minutes_to_keep_content)
    ingest_service = superdesk.get_resource_service("ingest")

    items = get_expired_items(provider, expiration_date)

    ids = [item["_id"] for item in items]
    items.rewind()
    file_ids = [
        rend.get("media")
        for item in items
        for rend in item.get("renditions", {}).values()
        if not item.get("archived") and rend.get("media")
    ]

    if ids:
        print("Removing items %s" % ids)
        ingest_service.delete({"_id": {"$in": ids}})

    for file_id in file_ids:
        print("Deleting file: ", file_id)
        superdesk.app.media.delete(file_id)

    stats.incr("ingest.expired_items", len(ids))
    print(
        "Removed expired content for provider: {0} count: {1}".format(provider.get("_id", "Detached items"), len(ids))
    )
def remove_expired_data(provider):
    """Remove expired data for provider"""
    print('Removing expired content for provider: %s' % provider.get('_id', 'Detached items'))
    minutes_to_keep_content = provider.get('content_expiry', INGEST_EXPIRY_MINUTES)
    expiration_date = utcnow() - timedelta(minutes=minutes_to_keep_content)
    ingest_service = superdesk.get_resource_service('ingest')

    items = get_expired_items(provider, expiration_date)

    ids = [item['_id'] for item in items]
    file_ids = [rend.get('media')
                for item in items
                for rend in item.get('renditions', {}).values()
                if not item.get('archived') and rend.get('media')]

    if ids:
        print('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})

    for file_id in file_ids:
        print('Deleting file: ', file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    print('Removed expired content for provider: {0} count: {1}'
          .format(provider.get('_id', 'Detached items'), len(ids)))
Example #3
0
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' %
                provider.get('_id', 'Detached items'))
    ingest_service = superdesk.get_resource_service('ingest')

    items = get_expired_items(provider)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [
        rend.get('media') for item in items
        for rend in item.get('renditions', {}).values()
        if not item.get('archived') and rend.get('media')
    ]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    logger.info('Removed expired content for provider: {0} count: {1}'.format(
        provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic()
def remove_expired_data(provider):
    """Remove expired data for provider"""
    print('Removing expired content for provider: %s' %
          provider.get('_id', 'Detached items'))
    minutes_to_keep_content = provider.get(
        'content_expiry', superdesk.app.config['INGEST_EXPIRY_MINUTES'])
    expiration_date = utcnow() - timedelta(minutes=minutes_to_keep_content)
    ingest_service = superdesk.get_resource_service('ingest')

    items = get_expired_items(provider, expiration_date)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [
        rend.get('media') for item in items
        for rend in item.get('renditions', {}).values()
        if not item.get('archived') and rend.get('media')
    ]

    if ids:
        print('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})

    for file_id in file_ids:
        print('Deleting file: ', file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    print('Removed expired content for provider: {0} count: {1}'.format(
        provider.get('_id', 'Detached items'), len(ids)))
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items'))
    ingest_service = superdesk.get_resource_service('ingest')

    items = get_expired_items(provider)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [rend.get('media')
                for item in items
                for rend in item.get('renditions', {}).values()
                if not item.get('archived') and rend.get('media')]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    logger.info('Removed expired content for provider: {0} count: {1}'
                .format(provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic()
Example #6
0
def update_provider(provider, rule_set=None, routing_scheme=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.
    """
    if ingest_for_provider_is_already_running(provider):
        return

    try:
        update = {
            LAST_UPDATED: utcnow()
        }

        for items in providers[provider.get('type')].update(provider):
            ingest_items(items, provider, rule_set, routing_scheme)
            stats.incr('ingest.ingested_items', len(items))
            if items:
                update[LAST_ITEM_UPDATE] = utcnow()
        ingest_service = superdesk.get_resource_service('ingest_providers')
        ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers',
                user_list=ingest_service._get_administrators(),
                name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD]))
        push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD]))
    finally:
        mark_provider_as_not_running(provider)
Example #7
0
def update_provider(provider, rule_set=None, routing_scheme=None):
    """Fetch items from ingest provider, ingest them into Superdesk and update the provider.

    :param provider: Ingest Provider data
    :param rule_set: Translation Rule Set if one is associated with Ingest Provider.
    :param routing_scheme: Routing Scheme if one is associated with Ingest Provider.
    """
    lock_name = get_lock_id('ingest', provider['name'],
                            provider[superdesk.config.ID_FIELD])

    if not lock(lock_name, expire=1810):
        return

    try:
        feeding_service = registered_feeding_services[
            provider['feeding_service']]
        feeding_service = feeding_service.__class__()

        update = {LAST_UPDATED: utcnow()}

        for items in feeding_service.update(provider, update):
            ingest_items(items, provider, feeding_service, rule_set,
                         routing_scheme)
            stats.incr('ingest.ingested_items', len(items))
            if items:
                update[LAST_ITEM_UPDATE] = utcnow()

        # Some Feeding Services update the collection and by this time the _etag might have been changed.
        # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised.
        ingest_provider_service = superdesk.get_resource_service(
            'ingest_providers')
        provider = ingest_provider_service.find_one(
            req=None, _id=provider[superdesk.config.ID_FIELD])
        ingest_provider_service.system_update(
            provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            admins = superdesk.get_resource_service(
                'users').get_users_by_user_type('administrator')
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers',
                user_list=admins,
                name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(
                    tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(
            provider[superdesk.config.ID_FIELD]))

        if LAST_ITEM_UPDATE in update:  # Only push a notification if there has been an update
            push_notification('ingest:update',
                              provider_id=str(
                                  provider[superdesk.config.ID_FIELD]))
    finally:
        unlock(lock_name)
Example #8
0
def update_provider(self, provider, rule_set=None, routing_scheme=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.

    :param self:
    :type self:
    :param provider: Ingest Provider Details
    :type provider: dict :py:class:`superdesk.io.ingest_provider_model.IngestProviderResource`
    :param rule_set: Translation Rule Set if one is associated with Ingest Provider.
    :type rule_set: dict :py:class:`apps.rules.rule_sets.RuleSetsResource`
    :param routing_scheme: Routing Scheme if one is associated with Ingest Provider.
    :type routing_scheme: dict :py:class:`apps.rules.routing_rules.RoutingRuleSchemeResource`
    """

    lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD])
    host_name = get_host_id(self)

    if not lock(lock_name, host_name, expire=1800):
        return

    try:
        feeding_service = registered_feeding_services[provider['feeding_service']]
        feeding_service = feeding_service.__class__()

        update = {LAST_UPDATED: utcnow()}

        for items in feeding_service.update(provider):
            ingest_items(items, provider, feeding_service, rule_set, routing_scheme)
            stats.incr('ingest.ingested_items', len(items))
            if items:
                update[LAST_ITEM_UPDATE] = utcnow()

        # Some Feeding Services update the collection and by this time the _etag might have been changed.
        # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised.
        ingest_provider_service = superdesk.get_resource_service('ingest_providers')
        provider = ingest_provider_service.find_one(req=None, _id=provider[superdesk.config.ID_FIELD])
        ingest_provider_service.system_update(provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            admins = superdesk.get_resource_service('users').get_users_by_user_type('administrator')
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers', user_list=admins, name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD]))

        if LAST_ITEM_UPDATE in update:  # Only push a notification if there has been an update
            push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD]))
    finally:
        unlock(lock_name, host_name)
Example #9
0
def update_provider(self, provider, rule_set=None, routing_scheme=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.
    """
    if provider.get('type') == 'search':
        return

    if not is_updatable(provider):
        return

    lock_name = get_lock_id('ingest', provider['name'],
                            provider[superdesk.config.ID_FIELD])
    host_name = get_host_id(self)

    if not lock(lock_name, host_name, expire=1800):
        return

    try:
        update = {LAST_UPDATED: utcnow()}

        for items in providers[provider.get('type')].update(provider):
            ingest_items(items, provider, rule_set, routing_scheme)
            stats.incr('ingest.ingested_items', len(items))
            if items:
                update[LAST_ITEM_UPDATE] = utcnow()
        ingest_service = superdesk.get_resource_service('ingest_providers')
        ingest_service.system_update(provider[superdesk.config.ID_FIELD],
                                     update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers',
                user_list=ingest_service._get_administrators(),
                name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(
                    tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(
            provider[superdesk.config.ID_FIELD]))
        # Only push a notification if there has been an update
        if LAST_ITEM_UPDATE in update:
            push_notification('ingest:update',
                              provider_id=str(
                                  provider[superdesk.config.ID_FIELD]))
    finally:
        unlock(lock_name, host_name)
def update_provider(provider, rule_set=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.
    """
    superdesk.get_resource_service('ingest_providers').update(provider['_id'], {
        LAST_UPDATED: utcnow(),
        # Providing the _etag as system updates to the documents shouldn't override _etag.
        app.config['ETAG']: provider.get(app.config['ETAG'])
    })

    for items in providers[provider.get('type')].update(provider):
        ingest_items(items, provider, rule_set)
        stats.incr('ingest.ingested_items', len(items))

    logger.info('Provider {0} updated'.format(provider['_id']))
    push_notification('ingest:update')
Example #11
0
def update_provider(self, provider, rule_set=None, routing_scheme=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.
    """
    if provider.get('type') == 'search':
        return

    if not is_updatable(provider):
        return

    lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD])
    host_name = get_host_id(self)

    if not lock(lock_name, host_name, expire=1800):
        return

    try:
        update = {
            LAST_UPDATED: utcnow()
        }

        for items in providers[provider.get('type')].update(provider):
            ingest_items(items, provider, rule_set, routing_scheme)
            stats.incr('ingest.ingested_items', len(items))
            if items:
                update[LAST_ITEM_UPDATE] = utcnow()
        ingest_service = superdesk.get_resource_service('ingest_providers')
        ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers',
                user_list=ingest_service._get_administrators(),
                name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD]))
        # Only push a notification if there has been an update
        if LAST_ITEM_UPDATE in update:
            push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD]))
    finally:
        unlock(lock_name, host_name)
def remove_expired_data(provider):
    """Remove expired data for provider"""
    print('Removing expired content for provider: %s' % provider['_id'])
    days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP)
    expiration_date = utcnow() - timedelta(days=days_to_keep_content)

    items = get_expired_items(str(provider['_id']), expiration_date)
    if items.count() > 0:
        for item in items:
            print('Removing item %s' % item['_id'])
            superdesk.get_resource_service('ingest').delete_action({'_id': str(item['_id'])})
            if not item.get('archived'):
                for file_id in [rend.get('media') for rend in item.get('renditions', {}).values()
                                if rend.get('media')]:
                    superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', items.count())
    print('Removed expired content for provider: %s' % provider['_id'])
Example #13
0
def update_provider(provider, rule_set=None, routing_scheme=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.
    """
    if is_task_running(
        provider["name"], provider[superdesk.config.ID_FIELD], provider.get("update_schedule", UPDATE_SCHEDULE_DEFAULT)
    ):
        return

    if provider.get("type") == "search":
        return

    if not is_updatable(provider):
        return

    try:
        update = {LAST_UPDATED: utcnow()}

        for items in providers[provider.get("type")].update(provider):
            ingest_items(items, provider, rule_set, routing_scheme)
            stats.incr("ingest.ingested_items", len(items))
            if items:
                update[LAST_ITEM_UPDATE] = utcnow()
        ingest_service = superdesk.get_resource_service("ingest_providers")
        ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            notify_and_add_activity(
                ACTIVITY_EVENT,
                "Provider {{name}} has gone strangely quiet. Last activity was on {{last}}",
                resource="ingest_providers",
                user_list=ingest_service._get_administrators(),
                name=provider.get("name"),
                last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"),
            )

        logger.info("Provider {0} updated".format(provider[superdesk.config.ID_FIELD]))
        push_notification("ingest:update", provider_id=str(provider[superdesk.config.ID_FIELD]))
    finally:
        mark_task_as_not_running(provider["name"], provider[superdesk.config.ID_FIELD])
def update_provider(provider, rule_set=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.
    """
    superdesk.get_resource_service('ingest_providers').update(
        provider['_id'],
        {
            LAST_UPDATED:
            utcnow(),
            # Providing the _etag as system updates to the documents shouldn't override _etag.
            app.config['ETAG']:
            provider.get(app.config['ETAG'])
        })

    for items in providers[provider.get('type')].update(provider):
        ingest_items(items, provider, rule_set)
        stats.incr('ingest.ingested_items', len(items))

    logger.info('Provider {0} updated'.format(provider['_id']))
    push_notification('ingest:update')
def remove_expired_data(provider):
    """Remove expired data for provider"""
    print('Removing expired content for provider: %s' % provider['_id'])
    days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP)
    expiration_date = utcnow() - timedelta(days=days_to_keep_content)

    items = get_expired_items(str(provider['_id']), expiration_date)
    if items.count() > 0:
        for item in items:
            print('Removing item %s' % item['_id'])
            superdesk.get_resource_service('ingest').delete_action(
                {'_id': str(item['_id'])})
            if not item.get('archived'):
                for file_id in [
                        rend.get('media')
                        for rend in item.get('renditions', {}).values()
                        if rend.get('media')
                ]:
                    superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', items.count())
    print('Removed expired content for provider: %s' % provider['_id'])
Example #16
0
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items'))

    try:
        feeding_service = registered_feeding_services[provider['feeding_service']]
        feeding_service = feeding_service.__class__()
        ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest'
    except KeyError:
        ingest_collection = 'ingest'

    ingest_service = superdesk.get_resource_service(ingest_collection)

    items = get_expired_items(provider, ingest_collection)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [rend.get('media')
                for item in items
                for rend in item.get('renditions', {}).values()
                if not item.get('archived') and rend.get('media')]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    logger.info('Removed expired content for provider: {0} count: {1}'
                .format(provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic(ingest_collection)
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items'))

    try:
        feeding_service = registered_feeding_services[provider['feeding_service']]
        feeding_service = feeding_service.__class__()
        ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest'
    except KeyError:
        ingest_collection = 'ingest'

    ingest_service = superdesk.get_resource_service(ingest_collection)

    items = get_expired_items(provider, ingest_collection)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [rend.get('media')
                for item in items
                for rend in item.get('renditions', {}).values()
                if not item.get('archived') and rend.get('media')]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    logger.info('Removed expired content for provider: {0} count: {1}'
                .format(provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic(ingest_collection)