def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider) ids = [item['_id'] for item in items] items.rewind() file_ids = [rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media')] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) logger.info('Removed expired content for provider: {0} count: {1}' .format(provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic()
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) try: feeding_service = registered_feeding_services[provider['feeding_service']] feeding_service = feeding_service.__class__() ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest' except KeyError: ingest_collection = 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) items = get_expired_items(provider, ingest_collection) ids = [item['_id'] for item in items] items.rewind() file_ids = [rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media')] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) logger.info('Removed expired content for provider: {0} count: {1}' .format(provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic(ingest_collection)
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider) ids = [item['_id'] for item in items] items.rewind() file_ids = [ rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media') ] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) logger.info('Removed expired content for provider: {0} count: {1}'.format( provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic()
def on_deleted(self, doc): if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_deleted(doc) remove_media_files(doc) add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}', self.datasource, item=doc, type=doc[ITEM_TYPE], subject=get_subject(doc)) push_expired_notification([doc])
def on_deleted(self, doc): get_component(ItemAutosave).clear(doc['_id']) if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_deleted(doc) remove_media_files(doc) add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}', self.datasource, item=doc, type=doc[ITEM_TYPE], subject=get_subject(doc)) push_expired_notification([doc.get(config.ID_FIELD)]) app.on_archive_item_deleted(doc)
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) try: feeding_service = registered_feeding_services[ provider['feeding_service']] feeding_service = feeding_service.__class__() ingest_collection = feeding_service.service if hasattr( feeding_service, 'service') else 'ingest' except KeyError: ingest_collection = 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) items = get_expired_items(provider, ingest_collection) ids = [item['_id'] for item in items] items.rewind() file_ids = [ rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media') ] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) logger.info('Removed expired content for provider: {0} count: {1}'.format( provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic(ingest_collection)
def remove_expired_data(provider): """Remove expired data for provider""" logger.info("Removing expired content for provider: %s" % provider.get("_id", "Detached items")) try: feeding_service = get_feeding_service(provider["feeding_service"]) ingest_collection = feeding_service.service if hasattr( feeding_service, "service") else "ingest" except KeyError: ingest_collection = "ingest" ingest_service = superdesk.get_resource_service(ingest_collection) items = get_expired_items(provider, ingest_collection) ids = [item["_id"] for item in items] items.rewind() file_ids = [ rend.get("media") for item in items for rend in item.get("renditions", {}).values() if not item.get("archived") and rend.get("media") ] if ids: logger.info("Removing items %s" % ids) ingest_service.delete({"_id": {"$in": ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info("Deleting file: %s" % file_id) superdesk.app.media.delete(file_id) logger.info("Removed expired content for provider: {0} count: {1}".format( provider.get("_id", "Detached items"), len(ids))) remove_expired_from_elastic(ingest_collection)
def _remove_expired_items(self, expiry_datetime): """ Remove the expired items. :param datetime expiry_datetime: expiry datetime :param str log_msg: log message to be prefixed """ logger.info('{} Starting to remove published expired items.'.format( self.log_msg)) archive_service = get_resource_service(ARCHIVE) published_service = get_resource_service('published') items_to_remove = set() items_to_be_archived = dict() items_having_issues = dict() expired_items = list( archive_service.get_expired_items(expiry_datetime)) if len(expired_items) == 0: logger.info('{} No items found to expire.'.format(self.log_msg)) return # delete spiked items self.delete_spiked_items(expired_items) # get killed items killed_items = { item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) in {CONTENT_STATE.KILLED} } # check if killed items imported to legal items_having_issues.update( self.check_if_items_imported_to_legal_archive(killed_items)) # filter out the killed items not imported to legal. killed_items = { item_id: item for item_id, item in killed_items.items() if item_id not in items_having_issues } # Get the not killed and spiked items not_killed_items = { item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) not in {CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED} } log_msg_format = "{{'_id': {_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \ "'expired_on': {expiry}}}." # Processing items to expire for item_id, item in not_killed_items.items(): item.setdefault(config.VERSION, 1) item.setdefault('expiry', expiry_datetime) item.setdefault('unique_name', '') expiry_msg = log_msg_format.format(**item) logger.info('{} Processing expired item. {}'.format( self.log_msg, expiry_msg)) processed_items = dict() if item_id not in items_to_be_archived and item_id not in items_having_issues and \ self._can_remove_item(item, processed_items): # item can be archived and removed from the database logger.info('{} Removing item. {}'.format( self.log_msg, expiry_msg)) logger.info('{} Items to be removed. {}'.format( self.log_msg, processed_items)) issues = self.check_if_items_imported_to_legal_archive( processed_items) if issues: items_having_issues.update(processed_items) else: items_to_be_archived.update(processed_items) # all items to expire items_to_expire = deepcopy(items_to_be_archived) # check once again in items imported to legal items_having_issues.update( self.check_if_items_imported_to_legal_archive(items_to_expire)) if items_having_issues: # remove items not imported to legal items_to_expire = { item_id: item for item_id, item in items_to_expire.items() if item_id not in items_having_issues } # remove items not imported to legal from archived items items_to_be_archived = { item_id: item for item_id, item in items_to_be_archived.items() if item_id not in items_having_issues } # items_to_be_archived might contain killed items for item_id, item in items_to_be_archived.items(): if item.get(ITEM_STATE) == CONTENT_STATE.KILLED: killed_items[item_id] = item # remove killed items from the items_to_be_archived items_to_be_archived = { item_id: item for item_id, item in items_to_be_archived.items() if item.get(ITEM_STATE) != CONTENT_STATE.KILLED } # add killed items to items to expire items_to_expire.update(killed_items) # get the filter conditions logger.info('{} filter conditions.'.format(self.log_msg)) req = ParsedRequest() filter_conditions = list( get_resource_service('content_filters').get( req=req, lookup={'is_archived_filter': True})) # move to archived collection logger.info('{} Archiving items.'.format(self.log_msg)) for item_id, item in items_to_be_archived.items(): self._move_to_archived(item, filter_conditions) for item_id, item in killed_items.items(): # delete from the published collection and queue msg = log_msg_format.format(**item) try: published_service.delete_by_article_id(item_id) logger.info( '{} Deleting killed item from published. {}'.format( self.log_msg, msg)) items_to_remove.add(item_id) except: logger.exception( '{} Failed to delete killed item from published. {}'. format(self.log_msg, msg)) if items_to_remove: logger.info('{} Deleting articles.: {}'.format( self.log_msg, items_to_remove)) archive_service.delete_by_article_ids(list(items_to_remove)) push_expired_notification(items_to_expire) for item_id, item in items_having_issues.items(): msg = log_msg_format.format(**item) try: archive_service.system_update(item.get(config.ID_FIELD), {'expiry_status': 'invalid'}, item) logger.info('{} Setting item expiry status. {}'.format( self.log_msg, msg)) except: logger.exception( '{} Failed to set expiry status for item. {}'.format( self.log_msg, msg)) logger.info('{} Deleting killed from archive.'.format(self.log_msg))
def _remove_expired_items(self, expiry_datetime): """Remove the expired items. :param datetime expiry_datetime: expiry datetime :param str log_msg: log message to be prefixed """ logger.info('{} Starting to remove published expired items.'.format(self.log_msg)) archive_service = get_resource_service(ARCHIVE) published_service = get_resource_service('published') items_to_remove = set() items_to_be_archived = dict() items_having_issues = dict() expired_items = list(archive_service.get_expired_items(expiry_datetime)) if len(expired_items) == 0: logger.info('{} No items found to expire.'.format(self.log_msg)) return # delete spiked items self.delete_spiked_items(expired_items) # get killed items killed_items = {item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) in {CONTENT_STATE.KILLED}} # check if killed items imported to legal items_having_issues.update(self.check_if_items_imported_to_legal_archive(killed_items)) # filter out the killed items not imported to legal. killed_items = {item_id: item for item_id, item in killed_items.items() if item_id not in items_having_issues} # Get the not killed and spiked items not_killed_items = {item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) not in {CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED}} log_msg_format = "{{'_id': {_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \ "'expired_on': {expiry}}}." # Processing items to expire for item_id, item in not_killed_items.items(): item.setdefault(config.VERSION, 1) item.setdefault('expiry', expiry_datetime) item.setdefault('unique_name', '') expiry_msg = log_msg_format.format(**item) logger.info('{} Processing expired item. {}'.format(self.log_msg, expiry_msg)) processed_items = dict() if item_id not in items_to_be_archived and item_id not in items_having_issues and \ self._can_remove_item(item, processed_items): # item can be archived and removed from the database logger.info('{} Removing item. {}'.format(self.log_msg, expiry_msg)) logger.info('{} Items to be removed. {}'.format(self.log_msg, processed_items)) issues = self.check_if_items_imported_to_legal_archive(processed_items) if issues: items_having_issues.update(processed_items) else: items_to_be_archived.update(processed_items) # all items to expire items_to_expire = deepcopy(items_to_be_archived) # check once again in items imported to legal items_having_issues.update(self.check_if_items_imported_to_legal_archive(items_to_expire)) if items_having_issues: # remove items not imported to legal items_to_expire = {item_id: item for item_id, item in items_to_expire.items() if item_id not in items_having_issues} # remove items not imported to legal from archived items items_to_be_archived = {item_id: item for item_id, item in items_to_be_archived.items() if item_id not in items_having_issues} # items_to_be_archived might contain killed items for item_id, item in items_to_be_archived.items(): if item.get(ITEM_STATE) == CONTENT_STATE.KILLED: killed_items[item_id] = item # remove killed items from the items_to_be_archived items_to_be_archived = {item_id: item for item_id, item in items_to_be_archived.items() if item.get(ITEM_STATE) != CONTENT_STATE.KILLED} # add killed items to items to expire items_to_expire.update(killed_items) # get the filter conditions logger.info('{} filter conditions.'.format(self.log_msg)) req = ParsedRequest() filter_conditions = list(get_resource_service('content_filters').get(req=req, lookup={'is_archived_filter': True})) # move to archived collection logger.info('{} Archiving items.'.format(self.log_msg)) for item_id, item in items_to_be_archived.items(): self._move_to_archived(item, filter_conditions) for item_id, item in killed_items.items(): # delete from the published collection and queue msg = log_msg_format.format(**item) try: published_service.delete_by_article_id(item_id) logger.info('{} Deleting killed item from published. {}'.format(self.log_msg, msg)) items_to_remove.add(item_id) except: logger.exception('{} Failed to delete killed item from published. {}'.format(self.log_msg, msg)) if items_to_remove: logger.info('{} Deleting articles.: {}'.format(self.log_msg, items_to_remove)) archive_service.delete_by_article_ids(list(items_to_remove)) push_expired_notification(items_to_expire) for item_id, item in items_having_issues.items(): msg = log_msg_format.format(**item) try: archive_service.system_update(item.get(config.ID_FIELD), {'expiry_status': 'invalid'}, item) logger.info('{} Setting item expiry status. {}'.format(self.log_msg, msg)) except: logger.exception('{} Failed to set expiry status for item. {}'.format(self.log_msg, msg)) logger.info('{} Deleting killed from archive.'.format(self.log_msg))