def test_lock(self): task = "test" self.assertTrue(lock(task, expire=2)) self.assertFalse(lock(task)) time.sleep(2) # lock again after expiry self.assertTrue(lock(task, expire=1)) self.assertTrue(touch(task, expire=10)) time.sleep(2) # can't lock after touch self.assertFalse(lock(task)) unlock(task) # can't touch after unlock self.assertFalse(touch(task)) # unlocking again is noop unlock(task) # locking after unlocking self.assertTrue(lock(task, expire=1))
def _remove_expired_archived_items(self, now, lock_name): if not touch(lock_name, expire=600): logger.warning( '{} Lost lock before removing expired items from archived.'. format(self.log_msg)) return EXPIRY_MINUTES = app.config.get('ARCHIVED_EXPIRY_MINUTES') EXPIRY_LIMIT = app.config.get('MAX_EXPIRY_QUERY_LIMIT', 100) if not EXPIRY_MINUTES: return logger.info('%s Starting to remove expired items from archived.', self.log_msg) archived_service = get_resource_service('archived') query = _get_expired_mongo_ids_query(EXPIRY_MINUTES, now) expired = list( archived_service.find(query, max_results=EXPIRY_LIMIT, sort='_id')) if not len(expired): logger.info('%s No items found to expire in archived.', self.log_msg) else: logger.info('%s Removing %d expired items from archived.', self.log_msg, len(expired)) removed = archived_service.delete_docs(expired) for item in expired: if item['_id'] not in removed: logger.error('%s Item was not removed from archived item=%s', self.log_msg, item['item_id']) continue signals.archived_item_removed.send(archived_service, item=item) if not app.config.get( 'LEGAL_ARCHIVE') and not archived_service.find_one( req=None, item_id=item['item_id']): remove_media_files(item)
def _remove_expired_items(self, expiry_datetime, lock_name): """Remove the expired items. :param datetime expiry_datetime: expiry datetime :param str log_msg: log message to be prefixed :param str lock_name: lock name to touch """ logger.info('{} Starting to remove published expired items.'.format( self.log_msg)) archive_service = get_resource_service(ARCHIVE) published_service = get_resource_service('published') items_to_remove = set() items_to_be_archived = dict() items_having_issues = dict() preserve_published_desks = { desk.get(config.ID_FIELD): 1 for desk in get_resource_service('desks').find( where={'preserve_published_content': True}) } for expired_items in archive_service.get_expired_items( expiry_datetime): if len(expired_items) == 0: logger.info('{} No items found to expire.'.format( self.log_msg)) return if not touch(lock_name, expire=600): logger.warning( '{} lost lock while removing expired items.'.format( self.log_msg)) return # delete spiked items self.delete_spiked_items(expired_items) # get killed items killed_items = { item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) in {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED} } # check if killed items imported to legal items_having_issues.update( self.check_if_items_imported_to_legal_archive(killed_items)) # filter out the killed items not imported to legal. killed_items = { item_id: item for item_id, item in killed_items.items() if item_id not in items_having_issues } # Get the not killed and spiked items not_killed_items = { item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) not in { CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.RECALLED } } log_msg_format = "{{'_id': {_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \ "'expired_on': {expiry}}}." # Processing items to expire for item_id, item in not_killed_items.items(): item.setdefault(config.VERSION, 1) item.setdefault('expiry', expiry_datetime) item.setdefault('unique_name', '') expiry_msg = log_msg_format.format(**item) logger.info('{} Processing expired item. {}'.format( self.log_msg, expiry_msg)) processed_items = dict() if item_id not in items_to_be_archived and item_id not in items_having_issues and \ self._can_remove_item(item, processed_items, preserve_published_desks): # item can be archived and removed from the database logger.info('{} Removing item. {}'.format( self.log_msg, expiry_msg)) logger.info('{} Items to be removed. {}'.format( self.log_msg, processed_items)) issues = self.check_if_items_imported_to_legal_archive( processed_items) if issues: items_having_issues.update(processed_items) else: items_to_be_archived.update(processed_items) # all items to expire items_to_expire = deepcopy(items_to_be_archived) # check once again in items imported to legal items_having_issues.update( self.check_if_items_imported_to_legal_archive(items_to_expire)) if items_having_issues: # remove items not imported to legal items_to_expire = { item_id: item for item_id, item in items_to_expire.items() if item_id not in items_having_issues } # remove items not imported to legal from archived items items_to_be_archived = { item_id: item for item_id, item in items_to_be_archived.items() if item_id not in items_having_issues } # items_to_be_archived might contain killed items for item_id, item in items_to_be_archived.items(): if item.get(ITEM_STATE) in { CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED }: killed_items[item_id] = item # remove killed items from the items_to_be_archived items_to_be_archived = { item_id: item for item_id, item in items_to_be_archived.items() if item.get(ITEM_STATE) not in {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED} } # add killed items to items to expire items_to_expire.update(killed_items) # get the filter conditions logger.info('{} Loading filter conditions.'.format(self.log_msg)) req = ParsedRequest() filter_conditions = list( get_resource_service('content_filters').get( req=req, lookup={'is_archived_filter': True})) # move to archived collection logger.info('{} Archiving items.'.format(self.log_msg)) for _item_id, item in items_to_be_archived.items(): self._move_to_archived(item, filter_conditions) for item_id, item in killed_items.items(): # delete from the published collection and queue msg = log_msg_format.format(**item) try: published_service.delete_by_article_id(item_id) logger.info( '{} Deleting killed item from published. {}'.format( self.log_msg, msg)) items_to_remove.add(item_id) except Exception: logger.exception( '{} Failed to delete killed item from published. {}'. format(self.log_msg, msg)) if items_to_remove: logger.info('{} Deleting articles.: {}'.format( self.log_msg, items_to_remove)) archive_service.delete_by_article_ids(list(items_to_remove)) for _item_id, item in items_having_issues.items(): msg = log_msg_format.format(**item) try: archive_service.system_update(item.get(config.ID_FIELD), {'expiry_status': 'invalid'}, item) logger.info('{} Setting item expiry status. {}'.format( self.log_msg, msg)) except Exception: logger.exception( '{} Failed to set expiry status for item. {}'.format( self.log_msg, msg)) logger.info('{} Deleting killed from archive.'.format( self.log_msg))
def update_provider(provider, rule_set=None, routing_scheme=None, sync=False): """Fetch items from ingest provider, ingest them into Superdesk and update the provider. :param provider: Ingest Provider data :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. :param sync: Running in sync mode from cli. """ lock_name = get_lock_id("ingest", provider["name"], provider[superdesk.config.ID_FIELD]) if not lock(lock_name, expire=UPDATE_TTL + 10): if sync: logger.error("update is already running for %s", provider["name"]) return try: feeding_service = get_feeding_service(provider["feeding_service"]) update = {LAST_UPDATED: utcnow()} if sync: provider[LAST_UPDATED] = utcnow() - timedelta( days=9999) # import everything again generator = feeding_service.update(provider, update) if isinstance(generator, list): generator = (items for items in generator) failed = None while True: try: if not touch(lock_name, expire=UPDATE_TTL): logger.warning("lock expired while updating provider %s", provider[superdesk.config.ID_FIELD]) return items = generator.send(failed) failed = ingest_items(items, provider, feeding_service, rule_set, routing_scheme) update_last_item_updated(update, items) except StopIteration: break # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service( "ingest_providers") provider = ingest_provider_service.find_one( req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update( provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service( "users").get_users_by_user_type("administrator") notify_and_add_activity( ACTIVITY_EVENT, "Provider {{name}} has gone strangely quiet. Last activity was on {{last}}", resource="ingest_providers", user_list=admins, name=provider.get("name"), last=provider[LAST_ITEM_UPDATE].replace( tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"), ) logger.info("Provider {0} updated".format( provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification("ingest:update", provider_id=str( provider[superdesk.config.ID_FIELD])) except Exception as e: logger.error("Failed to ingest file: {error}".format(error=e)) raise IngestFileError(3000, e, provider) finally: unlock(lock_name)
def sync(): lock_name = "ultrad" if not lock(lock_name): logger.info("lock taken %s", lock_name) return try: todo_stages = list( get_resource_service("stages").get( req=None, lookup={"name": app.config["ULTRAD_TODO_STAGE"]} ) ) if not len(todo_stages): logger.warning( "ultrad todo stage not found, name=%s", app.config["ULTRAD_TODO_STAGE"] ) return for todo_stage in todo_stages: desk = get_resource_service("desks").find_one( req=None, _id=todo_stage["desk"] ) if not desk: logger.warning( "ultrad desk not found for stage desk=%s", todo_stage["desk"] ) continue lookup = {"task.stage": todo_stage["_id"]} items = list(get_resource_service("archive").get(req=None, lookup=lookup)) logger.info( "checking %d items on ultrad on desk %s", len(items), desk["name"] ) for item in items: if not touch(lock_name, expire=300): logger.warning("lost lock %s", lock_name) break if item.get("lock_user") and item.get("lock_session"): logger.info("skipping locked item guid=%s", item["guid"]) continue if item["state"] not in IN_PROGRESS_STATES: logger.info( "ignore item due to state guid=%s state=%s", item["guid"], item["state"], ) continue try: ultrad_id = item["extra"][ULTRAD_ID] except KeyError: continue try: ultrad_doc = get_document(ultrad_id) except UltradException: continue if ultrad_doc["state"] == "revised": try: updated = item.copy() updated["body_html"] = ultrad_doc["text"]["edited"] except KeyError: logger.info( "no content in ultrad for item guid=%s ultrad_id=%s", item["guid"], ultrad_id, ) continue logger.info( "updating item from ultrad guid=%s ultrad_id=%s", item["guid"], ultrad_id, ) editor = Editor3Content(updated) editor._create_state_from_html(updated["body_html"]) editor.update_item() send_to( updated, desk_id=desk["_id"], stage_id=desk["working_stage"] ) updates = { "task": updated["task"], "body_html": updated["body_html"], "fields_meta": updated["fields_meta"], } # don't use patch, it assumes there is a user get_resource_service("archive").update(item["_id"], updates, item) get_resource_service("archive").on_updated(updates, item) else: logger.debug( "skip updating item guid=%s ultrad_id=%s state=%s", item["guid"], ultrad_id, ultrad_doc["state"], ) finally: unlock(lock_name)