def remove_expired_data(provider): """Remove expired data for provider""" print("Removing expired content for provider: %s" % provider.get("_id", "Detached items")) minutes_to_keep_content = provider.get("content_expiry", superdesk.app.config["INGEST_EXPIRY_MINUTES"]) expiration_date = utcnow() - timedelta(minutes=minutes_to_keep_content) ingest_service = superdesk.get_resource_service("ingest") items = get_expired_items(provider, expiration_date) ids = [item["_id"] for item in items] items.rewind() file_ids = [ rend.get("media") for item in items for rend in item.get("renditions", {}).values() if not item.get("archived") and rend.get("media") ] if ids: print("Removing items %s" % ids) ingest_service.delete({"_id": {"$in": ids}}) for file_id in file_ids: print("Deleting file: ", file_id) superdesk.app.media.delete(file_id) stats.incr("ingest.expired_items", len(ids)) print( "Removed expired content for provider: {0} count: {1}".format(provider.get("_id", "Detached items"), len(ids)) )
def remove_expired_data(provider): """Remove expired data for provider""" print('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) minutes_to_keep_content = provider.get('content_expiry', INGEST_EXPIRY_MINUTES) expiration_date = utcnow() - timedelta(minutes=minutes_to_keep_content) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider, expiration_date) ids = [item['_id'] for item in items] file_ids = [rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media')] if ids: print('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) for file_id in file_ids: print('Deleting file: ', file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) print('Removed expired content for provider: {0} count: {1}' .format(provider.get('_id', 'Detached items'), len(ids)))
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider) ids = [item['_id'] for item in items] items.rewind() file_ids = [ rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media') ] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) logger.info('Removed expired content for provider: {0} count: {1}'.format( provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic()
def remove_expired_data(provider): """Remove expired data for provider""" print('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) minutes_to_keep_content = provider.get( 'content_expiry', superdesk.app.config['INGEST_EXPIRY_MINUTES']) expiration_date = utcnow() - timedelta(minutes=minutes_to_keep_content) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider, expiration_date) ids = [item['_id'] for item in items] items.rewind() file_ids = [ rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media') ] if ids: print('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) for file_id in file_ids: print('Deleting file: ', file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) print('Removed expired content for provider: {0} count: {1}'.format( provider.get('_id', 'Detached items'), len(ids)))
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider) ids = [item['_id'] for item in items] items.rewind() file_ids = [rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media')] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) logger.info('Removed expired content for provider: {0} count: {1}' .format(provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic()
def update_provider(provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ if ingest_for_provider_is_already_running(provider): return try: update = { LAST_UPDATED: utcnow() } for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() ingest_service = superdesk.get_resource_service('ingest_providers') ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=ingest_service._get_administrators(), name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) finally: mark_provider_as_not_running(provider)
def update_provider(provider, rule_set=None, routing_scheme=None): """Fetch items from ingest provider, ingest them into Superdesk and update the provider. :param provider: Ingest Provider data :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) if not lock(lock_name, expire=1810): return try: feeding_service = registered_feeding_services[ provider['feeding_service']] feeding_service = feeding_service.__class__() update = {LAST_UPDATED: utcnow()} for items in feeding_service.update(provider, update): ingest_items(items, provider, feeding_service, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service( 'ingest_providers') provider = ingest_provider_service.find_one( req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update( provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service( 'users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace( tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format( provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str( provider[superdesk.config.ID_FIELD])) finally: unlock(lock_name)
def update_provider(self, provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. :param self: :type self: :param provider: Ingest Provider Details :type provider: dict :py:class:`superdesk.io.ingest_provider_model.IngestProviderResource` :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :type rule_set: dict :py:class:`apps.rules.rule_sets.RuleSetsResource` :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. :type routing_scheme: dict :py:class:`apps.rules.routing_rules.RoutingRuleSchemeResource` """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) host_name = get_host_id(self) if not lock(lock_name, host_name, expire=1800): return try: feeding_service = registered_feeding_services[provider['feeding_service']] feeding_service = feeding_service.__class__() update = {LAST_UPDATED: utcnow()} for items in feeding_service.update(provider): ingest_items(items, provider, feeding_service, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service('ingest_providers') provider = ingest_provider_service.find_one(req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service('users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) finally: unlock(lock_name, host_name)
def update_provider(self, provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ if provider.get('type') == 'search': return if not is_updatable(provider): return lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) host_name = get_host_id(self) if not lock(lock_name, host_name, expire=1800): return try: update = {LAST_UPDATED: utcnow()} for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() ingest_service = superdesk.get_resource_service('ingest_providers') ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=ingest_service._get_administrators(), name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace( tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format( provider[superdesk.config.ID_FIELD])) # Only push a notification if there has been an update if LAST_ITEM_UPDATE in update: push_notification('ingest:update', provider_id=str( provider[superdesk.config.ID_FIELD])) finally: unlock(lock_name, host_name)
def update_provider(provider, rule_set=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ superdesk.get_resource_service('ingest_providers').update(provider['_id'], { LAST_UPDATED: utcnow(), # Providing the _etag as system updates to the documents shouldn't override _etag. app.config['ETAG']: provider.get(app.config['ETAG']) }) for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set) stats.incr('ingest.ingested_items', len(items)) logger.info('Provider {0} updated'.format(provider['_id'])) push_notification('ingest:update')
def update_provider(self, provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ if provider.get('type') == 'search': return if not is_updatable(provider): return lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) host_name = get_host_id(self) if not lock(lock_name, host_name, expire=1800): return try: update = { LAST_UPDATED: utcnow() } for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() ingest_service = superdesk.get_resource_service('ingest_providers') ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=ingest_service._get_administrators(), name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) # Only push a notification if there has been an update if LAST_ITEM_UPDATE in update: push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) finally: unlock(lock_name, host_name)
def remove_expired_data(provider): """Remove expired data for provider""" print('Removing expired content for provider: %s' % provider['_id']) days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP) expiration_date = utcnow() - timedelta(days=days_to_keep_content) items = get_expired_items(str(provider['_id']), expiration_date) if items.count() > 0: for item in items: print('Removing item %s' % item['_id']) superdesk.get_resource_service('ingest').delete_action({'_id': str(item['_id'])}) if not item.get('archived'): for file_id in [rend.get('media') for rend in item.get('renditions', {}).values() if rend.get('media')]: superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', items.count()) print('Removed expired content for provider: %s' % provider['_id'])
def update_provider(provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ if is_task_running( provider["name"], provider[superdesk.config.ID_FIELD], provider.get("update_schedule", UPDATE_SCHEDULE_DEFAULT) ): return if provider.get("type") == "search": return if not is_updatable(provider): return try: update = {LAST_UPDATED: utcnow()} for items in providers[provider.get("type")].update(provider): ingest_items(items, provider, rule_set, routing_scheme) stats.incr("ingest.ingested_items", len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() ingest_service = superdesk.get_resource_service("ingest_providers") ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): notify_and_add_activity( ACTIVITY_EVENT, "Provider {{name}} has gone strangely quiet. Last activity was on {{last}}", resource="ingest_providers", user_list=ingest_service._get_administrators(), name=provider.get("name"), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"), ) logger.info("Provider {0} updated".format(provider[superdesk.config.ID_FIELD])) push_notification("ingest:update", provider_id=str(provider[superdesk.config.ID_FIELD])) finally: mark_task_as_not_running(provider["name"], provider[superdesk.config.ID_FIELD])
def update_provider(provider, rule_set=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ superdesk.get_resource_service('ingest_providers').update( provider['_id'], { LAST_UPDATED: utcnow(), # Providing the _etag as system updates to the documents shouldn't override _etag. app.config['ETAG']: provider.get(app.config['ETAG']) }) for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set) stats.incr('ingest.ingested_items', len(items)) logger.info('Provider {0} updated'.format(provider['_id'])) push_notification('ingest:update')
def remove_expired_data(provider): """Remove expired data for provider""" print('Removing expired content for provider: %s' % provider['_id']) days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP) expiration_date = utcnow() - timedelta(days=days_to_keep_content) items = get_expired_items(str(provider['_id']), expiration_date) if items.count() > 0: for item in items: print('Removing item %s' % item['_id']) superdesk.get_resource_service('ingest').delete_action( {'_id': str(item['_id'])}) if not item.get('archived'): for file_id in [ rend.get('media') for rend in item.get('renditions', {}).values() if rend.get('media') ]: superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', items.count()) print('Removed expired content for provider: %s' % provider['_id'])
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) try: feeding_service = registered_feeding_services[provider['feeding_service']] feeding_service = feeding_service.__class__() ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest' except KeyError: ingest_collection = 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) items = get_expired_items(provider, ingest_collection) ids = [item['_id'] for item in items] items.rewind() file_ids = [rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media')] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) logger.info('Removed expired content for provider: {0} count: {1}' .format(provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic(ingest_collection)