def publish(): """Fetch items from publish queue as per the configuration, call the transmit function.""" with ProfileManager('publish:transmit'): lock_name = get_lock_id("Transmit", "Articles") if not lock(lock_name, expire=1810): logger.info('Task: %s is already running.', lock_name) return try: for priority in [True, False]: # top priority first for retries in [False, True]: # first publish pending, retries after subs = get_queue_subscribers(priority=priority, retries=retries) for sub in subs: sub_lock_name = get_lock_id('Subscriber', 'Transmit', sub) if is_locked(sub_lock_name): logger.info('Task: %s is already running.', sub_lock_name) continue transmit_subscriber_items.apply_async( (str(sub), ), {'retries': retries, 'priority': priority}, queue=_get_queue(priority), ) except Exception: logger.exception('Task: %s failed.', lock_name) finally: logger.debug('unlock %s', lock_name) unlock(lock_name)
def run(self, page_size=None): logger.info("Import to Legal Archive") lock_name = get_lock_id("legal_archive", "import_to_legal_archive") page_size = int(page_size) if page_size else self.default_page_size if not lock(lock_name, "", expire=1800): return try: legal_archive_import = LegalArchiveImport() publish_queue = get_resource_service("publish_queue") for items in self.get_expired_items(page_size): for item in items: try: legal_archive_import.upsert_into_legal_archive(item.get("item_id")) req = ParsedRequest() req.where = json.dumps({"item_id": item["item_id"]}) queue_items = list(publish_queue.get(req=req, lookup=None)) if queue_items: try: logger.info("Import to Legal Publish Queue") legal_archive_import.process_queue_items(queue_items, True) except: logger.exception( "Failed to import into legal publish queue " "archive via command {}.".format(item.get("item_id")) ) except: logger.exception( "Failed to import into legal " "archive via command {}.".format(item.get("item_id")) ) except: logger.exception("Failed to import into legal archive.") finally: unlock(lock_name, "")
def run(self): now = utcnow() self.log_msg = 'Expiry Time: {}.'.format(now) logger.info('{} Starting to remove expired content at.'.format( self.log_msg)) lock_name = get_lock_id('archive', 'remove_expired') if not lock(lock_name, expire=1800): logger.info( '{} Remove expired content task is already running.'.format( self.log_msg)) return logger.info('{} Removing expired content for expiry.'.format( self.log_msg)) # all functions should be called, even the first one throw exception, # so they are wrapped with log_exeption self._remove_expired_publish_queue_items(now) self._remove_expired_items(now, lock_name) self._remove_expired_archived_items(now, lock_name) unlock(lock_name) push_notification('content:expired') logger.info('{} Completed remove expired content.'.format( self.log_msg)) remove_locks()
def run(self, immediate=False): self.log_msg = 'Monitoring Scheduled Alerts: {}'.format(utcnow()) logger.info('{} Starting to send alerts.'.format(self.log_msg)) lock_name = get_lock_id( 'newsroom', 'monitoring_{0}'.format( 'scheduled' if not immediate else 'immediate')) if not lock(lock_name, expire=610): logger.error('{} Job already running'.format(self.log_msg)) return try: now_local = utc_to_local(app.config['DEFAULT_TIMEZONE'], utcnow()) app.config['SERVER_NAME'] = urlparse( app.config['CLIENT_URL']).netloc or None celery.conf['SERVER_NAME'] = app.config['SERVER_NAME'] now_to_minute = now_local.replace(second=0, microsecond=0) if immediate: self.immediate_worker(now_to_minute) else: self.scheduled_worker(now_to_minute) except Exception as e: logger.exception(e) unlock(lock_name) remove_locks() logger.info('{} Completed sending Monitoring Scheduled Alerts.'.format( self.log_msg))
def run(self): now = utcnow() self.log_msg = 'Delete Spiked Items Time: {}.'.format(now) logger.info('{} Starting to delete spiked items at.'.format( self.log_msg)) expire_interval = app.config.get('PLANNING_DELETE_SPIKED_MINUTES', 0) if expire_interval == 0: logger.info( '{} PLANNING_DELETE_SPIKED_MINUTES=0, not spiking any items') return lock_name = get_lock_id('planning', 'delete_spiked') if not lock(lock_name, expire=610): logger.info( '{} Delete spiked items task is already running'.format( self.log_msg)) return expiry_datetime = now - timedelta(minutes=expire_interval) try: self._delete_spiked_events(expiry_datetime) except Exception as e: logger.exception(e) try: self._delete_spiked_planning(expiry_datetime) except Exception as e: logger.exception(e) unlock(lock_name) logger.info('{} Completed deleting spiked items.'.format(self.log_msg)) remove_locks()
def create_scheduled_content(now=None): lock_name = get_lock_id("Template", "Schedule") if not lock(lock_name, expire=130): logger.info("Task: {} is already running.".format(lock_name)) return try: if now is None: now = utcnow() templates = get_scheduled_templates(now) production = superdesk.get_resource_service(ARCHIVE) items = [] for template in templates: set_template_timestamps(template, now) item = get_item_from_template(template) item[config.VERSION] = 1 production.post([item]) insert_into_versions(doc=item) try: apply_onstage_rule(item, item.get(config.ID_FIELD)) except Exception as ex: # noqa logger.exception( "Failed to apply on stage rule while scheduling template.") items.append(item) return items except Exception as e: logger.exception("Task: {} failed with error {}.".format( lock_name, str(e))) finally: unlock(lock_name)
def transmit_subscriber_items(subscriber, retries=False, priority=None): lock_name = get_lock_id('Subscriber', 'Transmit', subscriber) is_async = get_resource_service('subscribers').is_async(subscriber) if not lock(lock_name, expire=610): logger.info('Task: {} is already running.'.format(lock_name)) return try: queue_items = get_queue_items(retries, subscriber, priority) for queue_item in queue_items: args = [queue_item[config.ID_FIELD]] kwargs = {'is_async': is_async} if is_async: transmit_item.apply_async( args=args, kwargs=kwargs, queue=_get_queue(priority), ) else: if not transmit_item(*args, **kwargs): logger.debug('got error transmitting item %s', args[0]) break finally: logger.debug('unlock %s', lock_name) unlock(lock_name)
def run(self): now = utcnow() self.log_msg = 'Expiry Time: {}.'.format(now) logger.info('{} Starting to remove expired content at.'.format(self.log_msg)) expire_interval = app.config.get('PLANNING_EXPIRY_MINUTES', 0) if expire_interval == 0: logger.info('{} PLANNING_EXPIRY_MINUTES=0, not flagging items as expired') return lock_name = get_lock_id('planning', 'flag_expired') if not lock(lock_name, expire=610): logger.info('{} Flag expired items task is already running'.format(self.log_msg)) return expiry_datetime = now - timedelta(minutes=expire_interval) try: self._flag_expired_events(expiry_datetime) except Exception as e: logger.exception(e) try: self._flag_expired_planning(expiry_datetime) except Exception as e: logger.exception(e) unlock(lock_name) logger.info('{} Completed flagging expired items.'.format(self.log_msg)) remove_locks() logger.info('{} Starting to remove expired planning versions.'.format(self.log_msg)) self._remove_expired_published_planning() logger.info('{} Completed removing expired planning versions.'.format(self.log_msg))
def create_scheduled_content(now=None): lock_name = get_lock_id("Template", "Schedule") if not lock(lock_name, expire=130): logger.info('Task: {} is already running.'.format(lock_name)) return try: if now is None: now = utcnow() templates = get_scheduled_templates(now) production = superdesk.get_resource_service(ARCHIVE) items = [] for template in templates: set_template_timestamps(template, now) item = get_item_from_template(template) item[config.VERSION] = 1 production.post([item]) insert_into_versions(doc=item) try: apply_onstage_rule(item, item.get(config.ID_FIELD)) except Exception as ex: # noqa logger.exception('Failed to apply on stage rule while scheduling template.') items.append(item) return items except Exception as e: logger.exception('Task: {} failed with error {}.'.format(lock_name, str(e))) finally: unlock(lock_name)
def publish(): """ Fetches items from publish queue as per the configuration, calls the transmit function. """ lock_name = get_lock_id("Transmit", "Articles") if not lock(lock_name, '', expire=1800): logger.info('Task: {} is already running.'.format(lock_name)) return try: # Query any oustanding transmit requests items = list(get_queue_items()) if len(items) > 0: transmit_items(items) # Query any outstanding retry attempts retry_items = list(get_queue_items(True)) if len(retry_items) > 0: transmit_items(retry_items) except: logger.exception('Task: {} failed.'.format(lock_name)) finally: unlock(lock_name, '')
def run(self, desk=None): if desk: self.default_desk = desk logger.info( 'Starting to export {} desk legal archive content to archived'. format(self.default_desk)) lock_name = get_lock_id('legal_archive', 'export_to_archived') if not lock(lock_name, expire=610): logger.info( 'Export legal archive to archived task is already running.') return try: list_ids = self._export_to_archived() finally: unlock(lock_name) if list_ids: logger.info( 'Completed exporting {} {} desk documents from legal archive to text archived' .format(len(list_ids), self.default_desk)) else: logger.info('Completed but nothing was exported...')
def publish(): """ Fetches items from publish queue as per the configuration, calls the transmit function. """ with ProfileManager('publish:transmit'): lock_name = get_lock_id("Transmit", "Articles") if not lock(lock_name, expire=1810): logger.info('Task: {} is already running.'.format(lock_name)) return try: # Query any oustanding transmit requests items = list(get_queue_items()) if len(items) > 0: transmit_items(items) # Query any outstanding retry attempts retry_items = list(get_queue_items(True)) if len(retry_items) > 0: transmit_items(retry_items) except: logger.exception('Task: {} failed.'.format(lock_name)) finally: unlock(lock_name)
def run(self, expire_hours=None): if expire_hours: self.expire_hours = expire_hours elif 'TEMP_FILE_EXPIRY_HOURS' in app.config: self.expire_hours = app.config['TEMP_FILE_EXPIRY_HOURS'] expire_at = utcnow() - timedelta(hours=self.expire_hours) self.log_msg = 'Expiry Time: {}.'.format(expire_at) logger.info('{} Starting to remove exported files from storage'.format( self.log_msg)) lock_name = get_lock_id('storage', 'remove_exported') if not lock(lock_name, expire=300): logger.info( 'Remove exported files from storage task is already running') return try: logger.info('{} Removing expired temporary media files'.format( self.log_msg)) self._remove_exported_files(expire_at) finally: unlock(lock_name) logger.info('{} Completed removing exported files from storage'.format( self.log_msg))
def run(self, expiry_days=None): if expiry_days: self.expiry_days = int(expiry_days) elif app.settings.get("CONTENT_API_EXPIRY_DAYS"): self.expiry_days = app.settings["CONTENT_API_EXPIRY_DAYS"] if self.expiry_days == 0: logger.info("Expiry days is set to 0, therefor no items will be removed.") return now = utcnow() self.log_msg = "Expiry Time: {}".format(now) logger.info("{} Starting to remove expired content_api items.".format(self.log_msg)) lock_name = get_lock_id("content_api", "remove_expired") if not lock(lock_name, expire=600): logger.info("{} Remove expired content_api items task is already running".format(self.log_msg)) return try: num_items_removed = self._remove_expired_items(now, self.expiry_days) finally: unlock(lock_name) if num_items_removed == 0: logger.info("{} Completed but no items were removed".format(self.log_msg)) else: logger.info("{} Completed removing {} expired content_api items".format(self.log_msg, num_items_removed))
def run(self, ): logger.info('Starting to fullfill assignments.') lock_name = get_lock_id('planning', 'fulfill_assignments') if not lock(lock_name, expire=610): logger.info('{} Fulfill Assignments task is already running') return # Get a list of the outstanding photo assignments assignments = list(self._get_outstanding_photo_assignments()) # query for any images available from the image site API with those assigment id's completed_assignments = self._check_complete(assignments) self._mark_as_complete(completed_assignments) complete = [ c.get('assignment').get('_id') for c in completed_assignments ] # check if any of the outstanding assignments are in either the picedit or aapimage pools in_progress_assignments = self._check_in_progress( assignments, complete) self._mark_as_in_progress(in_progress_assignments) unlock(lock_name) logger.info('Finished fulfilling assignments')
def run(self, expiry_days=None): if expiry_days: self.expiry_days = int(expiry_days) elif app.settings.get('CONTENT_API_EXPIRY_DAYS'): self.expiry_days = app.settings['CONTENT_API_EXPIRY_DAYS'] if self.expiry_days == 0: logger.info('Expiry days is set to 0, therefor no items will be removed.') return now = utcnow() self.log_msg = 'Expiry Time: {}'.format(now) logger.info('{} Starting to remove expired content_api items.'.format(self.log_msg)) lock_name = get_lock_id('content_api', 'remove_expired') if not lock(lock_name, expire=600): logger.info('{} Remove expired content_api items task is already running'.format(self.log_msg)) return try: num_items_removed = self._remove_expired_items(now, self.expiry_days) finally: unlock(lock_name) if num_items_removed == 0: logger.info('{} Completed but no items were removed'.format(self.log_msg)) else: logger.info('{} Completed removing {} expired content_api items'.format(self.log_msg, num_items_removed))
def transmit_item(queue_item_id, is_async=False): publish_queue_service = get_resource_service(PUBLISH_QUEUE) lock_name = get_lock_id('Transmit', queue_item_id) if is_async and not lock(lock_name, expire=310): logger.info('lock {}'.format(lock_name)) return try: # check the status of the queue item queue_item = publish_queue_service.find_one(req=None, _id=queue_item_id) if queue_item.get('state') not in [QueueState.PENDING.value, QueueState.RETRYING.value]: logger.info('Transmit State is not pending/retrying for queue item: {}. It is in {}'. format(queue_item.get(config.ID_FIELD), queue_item.get('state'))) return log_msg = '_id: {_id} item_id: {item_id} state: {state} ' \ 'item_version: {item_version} headline: {headline}'.format(**queue_item) # update the status of the item to in-progress queue_update = {'state': 'in-progress', 'transmit_started_at': utcnow()} publish_queue_service.patch(queue_item.get(config.ID_FIELD), queue_update) logger.info('Transmitting queue item {}'.format(log_msg)) destination = queue_item['destination'] transmitter = superdesk.publish.registered_transmitters[destination.get('delivery_type')] transmitter.transmit(queue_item) logger.info('Transmitted queue item {}'.format(log_msg)) return True except Exception as e: logger.exception('Failed to transmit queue item {}'.format(log_msg)) max_retry_attempt = app.config.get('MAX_TRANSMIT_RETRY_ATTEMPT') retry_attempt_delay = app.config.get('TRANSMIT_RETRY_ATTEMPT_DELAY_MINUTES') try: orig_item = publish_queue_service.find_one(req=None, _id=queue_item['_id']) updates = {config.LAST_UPDATED: utcnow()} if orig_item.get('retry_attempt', 0) < max_retry_attempt and \ not isinstance(e, PublishHTTPPushClientError): updates['retry_attempt'] = orig_item.get('retry_attempt', 0) + 1 updates['state'] = QueueState.RETRYING.value updates['next_retry_attempt_at'] = utcnow() + timedelta(minutes=retry_attempt_delay) else: # all retry attempts exhausted marking the item as failed. updates['state'] = QueueState.FAILED.value publish_queue_service.system_update(orig_item.get(config.ID_FIELD), updates, orig_item) except Exception: logger.error('Failed to set the state for failed publish queue item {}.'.format(queue_item['_id'])) # raise to stop transmitting items and free worker, in case there is some error # it's probably network related so trying more items now will probably only block # for longer time logger.debug('got err, stop task %s', lock_name) raise finally: if is_async: logger.debug('unlock %s', lock_name) unlock(lock_name, remove=True)
def transmit_subscriber_items(self, queue_items, subscriber): # Attempt to obtain a lock for transmissions to the subscriber lock_name = get_lock_id("Subscriber", "Transmit", subscriber) if not lock(lock_name, expire=610): return for queue_item in queue_items: publish_queue_service = get_resource_service(PUBLISH_QUEUE) log_msg = ( "_id: {_id} item_id: {item_id} state: {state} " "item_version: {item_version} headline: {headline}".format(**queue_item) ) try: # check the status of the queue item queue_item = publish_queue_service.find_one(req=None, _id=queue_item[config.ID_FIELD]) if queue_item.get("state") not in [QueueState.PENDING.value, QueueState.RETRYING.value]: logger.info( "Transmit State is not pending/retrying for queue item: {}. It is in {}".format( queue_item.get(config.ID_FIELD), queue_item.get("state") ) ) continue # update the status of the item to in-progress queue_update = {"state": "in-progress", "transmit_started_at": utcnow()} publish_queue_service.patch(queue_item.get(config.ID_FIELD), queue_update) logger.info("Transmitting queue item {}".format(log_msg)) destination = queue_item["destination"] transmitter = superdesk.publish.registered_transmitters[destination.get("delivery_type")] transmitter.transmit(queue_item) logger.info("Transmitted queue item {}".format(log_msg)) except Exception as e: logger.exception("Failed to transmit queue item {}".format(log_msg)) max_retry_attempt = app.config.get("MAX_TRANSMIT_RETRY_ATTEMPT") retry_attempt_delay = app.config.get("TRANSMIT_RETRY_ATTEMPT_DELAY_MINUTES") try: orig_item = publish_queue_service.find_one(req=None, _id=queue_item["_id"]) updates = {config.LAST_UPDATED: utcnow()} if orig_item.get("retry_attempt", 0) < max_retry_attempt and not isinstance( e, PublishHTTPPushClientError ): updates["retry_attempt"] = orig_item.get("retry_attempt", 0) + 1 updates["state"] = QueueState.RETRYING.value updates["next_retry_attempt_at"] = utcnow() + timedelta(minutes=retry_attempt_delay) else: # all retry attempts exhausted marking the item as failed. updates["state"] = QueueState.FAILED.value publish_queue_service.system_update(orig_item.get(config.ID_FIELD), updates, orig_item) except: logger.error("Failed to set the state for failed publish queue item {}.".format(queue_item["_id"])) # Release the lock for the subscriber unlock(lock_name)
def update_provider(provider, rule_set=None, routing_scheme=None): """Fetch items from ingest provider, ingest them into Superdesk and update the provider. :param provider: Ingest Provider data :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) if not lock(lock_name, expire=1810): return try: feeding_service = registered_feeding_services[ provider['feeding_service']] feeding_service = feeding_service.__class__() update = {LAST_UPDATED: utcnow()} for items in feeding_service.update(provider, update): ingest_items(items, provider, feeding_service, rule_set, routing_scheme) if items: update[LAST_ITEM_UPDATE] = utcnow() # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service( 'ingest_providers') provider = ingest_provider_service.find_one( req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update( provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service( 'users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace( tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format( provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str( provider[superdesk.config.ID_FIELD])) except Exception as e: logger.error("Failed to ingest file: {error}".format(error=e)) raise IngestFileError(3000, e, provider) finally: unlock(lock_name)
def run(self): logger.info('Importing Legal Publish Queue') lock_name = get_lock_id('legal_archive', 'import_legal_publish_queue') if not lock(lock_name, '', expire=600): return try: LegalArchiveImport().import_legal_publish_queue() finally: unlock(lock_name, '')
def run(self): logger.info("Import to Legal Publish Queue") lock_name = get_lock_id("legal_archive", "import_legal_publish_queue") if not lock(lock_name, "", expire=600): return try: LegalArchiveImport().import_legal_publish_queue() finally: unlock(lock_name, "")
def update_provider(provider, rule_set=None, routing_scheme=None, sync=False): """Fetch items from ingest provider, ingest them into Superdesk and update the provider. :param provider: Ingest Provider data :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. :param sync: Running in sync mode from cli. """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) if not lock(lock_name, expire=UPDATE_TTL + 10): if sync: logger.error('update is already running for %s', provider['name']) return try: feeding_service = get_feeding_service(provider['feeding_service']) update = {LAST_UPDATED: utcnow()} if sync: provider[LAST_UPDATED] = utcnow() - timedelta(days=9999) # import everything again generator = feeding_service.update(provider, update) if isinstance(generator, list): generator = (items for items in generator) failed = None while True: try: items = generator.send(failed) failed = ingest_items(items, provider, feeding_service, rule_set, routing_scheme) update_last_item_updated(update, items) except StopIteration: break # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service('ingest_providers') provider = ingest_provider_service.find_one(req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service('users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) except Exception as e: logger.error("Failed to ingest file: {error}".format(error=e)) raise IngestFileError(3000, e, provider) finally: unlock(lock_name)
def run(self, page_size=None): logger.info('Import to Legal Publish Queue') lock_name = get_lock_id('legal_archive', 'import_legal_publish_queue') page_size = int(page_size) if page_size else self.default_page_size if not lock(lock_name, expire=310): return try: LegalArchiveImport().import_legal_publish_queue(page_size=page_size) finally: unlock(lock_name)
def run(self, page_size=None): if not is_legal_archive_enabled(): return logger.info("Import to Legal Archive") lock_name = get_lock_id("legal_archive", "import_to_legal_archive") page_size = int(page_size) if page_size else self.default_page_size if not lock(lock_name, expire=1810): return try: legal_archive_import = LegalArchiveImport() # publish_queue = get_resource_service('publish_queue') # move the publish item to legal archive. expired_items = set() for items in self.get_expired_items(page_size): for item in items: self._move_to_legal(item.get("item_id"), item.get(config.VERSION), expired_items) # get the invalid items from archive. for items in get_resource_service(ARCHIVE).get_expired_items( utcnow(), invalid_only=True): for item in items: self._move_to_legal(item.get(config.ID_FIELD), item.get(config.VERSION), expired_items) # if publish item is moved but publish_queue item is not. if len(expired_items): try: for items in legal_archive_import.get_publish_queue_items( page_size, list(expired_items)): legal_archive_import.process_queue_items(items, True) except Exception: logger.exception( "Failed to import into legal publish queue via command" ) # reset the expiry status archive_service = get_resource_service(ARCHIVE) for item_id in expired_items: try: item = archive_service.find_one(req=None, _id=item_id) if item: archive_service.system_update(item_id, {"expiry_status": ""}, item) except Exception: logger.exception( "Failed to reset expiry status for item id: {}.". format(item_id)) except Exception: logger.exception("Failed to import into legal archive.") finally: unlock(lock_name)
def transmit_subscriber_items(self, queue_items, subscriber): # Attempt to obtain a lock for transmissions to the subscriber lock_name = get_lock_id("Subscriber", "Transmit", subscriber) if not lock(lock_name, expire=300): return for queue_item in queue_items: publish_queue_service = get_resource_service(PUBLISH_QUEUE) log_msg = '_id: {_id} item_id: {item_id} state: {state} ' \ 'item_version: {item_version} headline: {headline}'.format(**queue_item) try: # update the status of the item to in-progress logger.info('Transmitting queue item {}'.format(log_msg)) queue_update = {'state': 'in-progress', 'transmit_started_at': utcnow()} publish_queue_service.patch(queue_item.get(config.ID_FIELD), queue_update) destination = queue_item['destination'] transmitter = superdesk.publish.registered_transmitters[destination.get('delivery_type')] transmitter.transmit(queue_item) logger.info('Transmitted queue item {}'.format(log_msg)) except: logger.exception('Failed to transmit queue item {}'.format(log_msg)) max_retry_attempt = app.config.get('MAX_TRANSMIT_RETRY_ATTEMPT') retry_attempt_delay = app.config.get('TRANSMIT_RETRY_ATTEMPT_DELAY_MINUTES') try: orig_item = publish_queue_service.find_one(req=None, _id=queue_item['_id']) updates = {config.LAST_UPDATED: utcnow()} if orig_item.get('retry_attempt', 0) < max_retry_attempt: updates['retry_attempt'] = orig_item.get('retry_attempt', 0) + 1 updates['state'] = QueueState.RETRYING.value updates['next_retry_attempt_at'] = utcnow() + timedelta(minutes=retry_attempt_delay) else: # all retry attempts exhausted marking the item as failed. updates['state'] = QueueState.FAILED.value publish_queue_service.system_update(orig_item.get(config.ID_FIELD), updates, orig_item) except: logger.error('Failed to set the state for failed publish queue item {}.'.format(queue_item['_id'])) # Release the lock for the subscriber lock_name = get_lock_id("Subscriber", "Transmit", subscriber) unlock(lock_name)
def transmit_subscriber_items(self, queue_items, subscriber): # Attempt to obtain a lock for transmissions to the subscriber lock_name = get_lock_id('Subscriber', 'Transmit', subscriber) if not lock(lock_name, expire=610): return for queue_item in queue_items: publish_queue_service = get_resource_service(PUBLISH_QUEUE) log_msg = '_id: {_id} item_id: {item_id} state: {state} ' \ 'item_version: {item_version} headline: {headline}'.format(**queue_item) try: # check the status of the queue item queue_item = publish_queue_service.find_one(req=None, _id=queue_item[config.ID_FIELD]) if queue_item.get('state') not in [QueueState.PENDING.value, QueueState.RETRYING.value]: logger.info('Transmit State is not pending/retrying for queue item: {}. It is in {}'. format(queue_item.get(config.ID_FIELD), queue_item.get('state'))) continue # update the status of the item to in-progress queue_update = {'state': 'in-progress', 'transmit_started_at': utcnow()} publish_queue_service.patch(queue_item.get(config.ID_FIELD), queue_update) logger.info('Transmitting queue item {}'.format(log_msg)) destination = queue_item['destination'] transmitter = superdesk.publish.registered_transmitters[destination.get('delivery_type')] transmitter.transmit(queue_item) logger.info('Transmitted queue item {}'.format(log_msg)) except Exception as e: logger.exception('Failed to transmit queue item {}'.format(log_msg)) max_retry_attempt = app.config.get('MAX_TRANSMIT_RETRY_ATTEMPT') retry_attempt_delay = app.config.get('TRANSMIT_RETRY_ATTEMPT_DELAY_MINUTES') try: orig_item = publish_queue_service.find_one(req=None, _id=queue_item['_id']) updates = {config.LAST_UPDATED: utcnow()} if orig_item.get('retry_attempt', 0) < max_retry_attempt and \ not isinstance(e, PublishHTTPPushClientError): updates['retry_attempt'] = orig_item.get('retry_attempt', 0) + 1 updates['state'] = QueueState.RETRYING.value updates['next_retry_attempt_at'] = utcnow() + timedelta(minutes=retry_attempt_delay) else: # all retry attempts exhausted marking the item as failed. updates['state'] = QueueState.FAILED.value publish_queue_service.system_update(orig_item.get(config.ID_FIELD), updates, orig_item) except: logger.error('Failed to set the state for failed publish queue item {}.'.format(queue_item['_id'])) # Release the lock for the subscriber unlock(lock_name)
def update_provider(self, provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. :param self: :type self: :param provider: Ingest Provider Details :type provider: dict :py:class:`superdesk.io.ingest_provider_model.IngestProviderResource` :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :type rule_set: dict :py:class:`apps.rules.rule_sets.RuleSetsResource` :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. :type routing_scheme: dict :py:class:`apps.rules.routing_rules.RoutingRuleSchemeResource` """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) host_name = get_host_id(self) if not lock(lock_name, host_name, expire=1800): return try: feeding_service = registered_feeding_services[provider['feeding_service']] feeding_service = feeding_service.__class__() update = {LAST_UPDATED: utcnow()} for items in feeding_service.update(provider): ingest_items(items, provider, feeding_service, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service('ingest_providers') provider = ingest_provider_service.find_one(req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service('users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) finally: unlock(lock_name, host_name)
def transmit_subscriber_items(queue_items, subscriber): lock_name = get_lock_id('Subscriber', 'Transmit', subscriber) publish_queue_service = get_resource_service(PUBLISH_QUEUE) if not lock(lock_name, expire=610): return try: for queue_item in queue_items: log_msg = '_id: {_id} item_id: {item_id} state: {state} ' \ 'item_version: {item_version} headline: {headline}'.format(**queue_item) try: # check the status of the queue item queue_item = publish_queue_service.find_one(req=None, _id=queue_item[config.ID_FIELD]) if queue_item.get('state') not in [QueueState.PENDING.value, QueueState.RETRYING.value]: logger.info('Transmit State is not pending/retrying for queue item: {}. It is in {}'. format(queue_item.get(config.ID_FIELD), queue_item.get('state'))) continue # update the status of the item to in-progress queue_update = {'state': 'in-progress', 'transmit_started_at': utcnow()} publish_queue_service.patch(queue_item.get(config.ID_FIELD), queue_update) logger.info('Transmitting queue item {}'.format(log_msg)) destination = queue_item['destination'] transmitter = superdesk.publish.registered_transmitters[destination.get('delivery_type')] transmitter.transmit(queue_item) logger.info('Transmitted queue item {}'.format(log_msg)) except Exception as e: logger.exception('Failed to transmit queue item {}'.format(log_msg)) max_retry_attempt = app.config.get('MAX_TRANSMIT_RETRY_ATTEMPT') retry_attempt_delay = app.config.get('TRANSMIT_RETRY_ATTEMPT_DELAY_MINUTES') try: orig_item = publish_queue_service.find_one(req=None, _id=queue_item['_id']) updates = {config.LAST_UPDATED: utcnow()} if orig_item.get('retry_attempt', 0) < max_retry_attempt and \ not isinstance(e, PublishHTTPPushClientError): updates['retry_attempt'] = orig_item.get('retry_attempt', 0) + 1 updates['state'] = QueueState.RETRYING.value updates['next_retry_attempt_at'] = utcnow() + timedelta(minutes=retry_attempt_delay) else: # all retry attempts exhausted marking the item as failed. updates['state'] = QueueState.FAILED.value publish_queue_service.system_update(orig_item.get(config.ID_FIELD), updates, orig_item) except: logger.error('Failed to set the state for failed publish queue item {}.'.format(queue_item['_id'])) finally: unlock(lock_name)
def run(self, page_size=None): if not is_legal_archive_enabled(): return logger.info("Import to Legal Publish Queue") lock_name = get_lock_id("legal_archive", "import_legal_publish_queue") page_size = int(page_size) if page_size else self.default_page_size if not lock(lock_name, expire=310): return try: LegalArchiveImport().import_legal_publish_queue( page_size=page_size) finally: unlock(lock_name)
def update_provider(provider, rule_set=None, routing_scheme=None): """Fetch items from ingest provider, ingest them into Superdesk and update the provider. :param provider: Ingest Provider data :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) if not lock(lock_name, expire=1810): return try: feeding_service = registered_feeding_services[provider['feeding_service']] feeding_service = feeding_service.__class__() update = {LAST_UPDATED: utcnow()} for items in feeding_service.update(provider, update): ingest_items(items, provider, feeding_service, rule_set, routing_scheme) if items: last_item_update = max( [item['versioncreated'] for item in items if item.get('versioncreated')], default=utcnow() ) if not update.get(LAST_ITEM_UPDATE) or update[LAST_ITEM_UPDATE] < last_item_update: update[LAST_ITEM_UPDATE] = last_item_update # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service('ingest_providers') provider = ingest_provider_service.find_one(req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service('users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) except Exception as e: logger.error("Failed to ingest file: {error}".format(error=e)) raise IngestFileError(3000, e, provider) finally: unlock(lock_name)
def run(self, max_days=3, item_id=None, chunk_size=1000): now_utc = utcnow() # If we're generating stats for a single item, then # don't set max_days, as we want to process all history records # for the provided item if item_id is not None: max_days = 0 try: max_days = float(max_days) except (ValueError, TypeError): max_days = 3 gte = None if max_days <= 0.0 else utcnow() - timedelta(days=max_days) try: chunk_size = int(chunk_size) except (ValueError, TypeError): chunk_size = 1000 chunk_size = None if chunk_size <= 0 else chunk_size logger.info( 'Starting to generate archive statistics: {}. gte={}. item_id={}. chunk_size={}' .format(now_utc, gte, item_id, chunk_size)) lock_name = get_lock_id('analytics', 'gen_archive_statistics') if not lock(lock_name, expire=610): logger.info('Generate archive statistics task is already running.') return items_processed = 0 failed_ids = [] num_history_items = 0 try: items_processed, failed_ids, num_history_items = self.generate_stats( item_id, gte, chunk_size) except Exception: logger.exception('Failed to generate archive stats') finally: unlock(lock_name) if len(failed_ids) > 0: logger.warning('Failed to generate stats for items {}'.format( ', '.join(failed_ids))) duration = (utcnow() - now_utc).total_seconds() logger.info( 'Finished generating stats for {} items ({} history entries). Duration: {} seconds' .format(items_processed, num_history_items, int(duration)))
def update_provider(self, provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ if provider.get('type') == 'search': return if not is_updatable(provider): return lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) host_name = get_host_id(self) if not lock(lock_name, host_name, expire=1800): return try: update = {LAST_UPDATED: utcnow()} for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() ingest_service = superdesk.get_resource_service('ingest_providers') ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=ingest_service._get_administrators(), name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace( tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format( provider[superdesk.config.ID_FIELD])) # Only push a notification if there has been an update if LAST_ITEM_UPDATE in update: push_notification('ingest:update', provider_id=str( provider[superdesk.config.ID_FIELD])) finally: unlock(lock_name, host_name)
def run(self): """Fetches items from publish queue as per the configuration, calls the transmit function. """ lock_name = get_lock_id('publish', 'enqueue_published') if not lock(lock_name, expire=310): logger.info('Enqueue Task: {} is already running.'.format(lock_name)) return try: items = self.get_published_items() if len(items) > 0: self.enqueue_items(items) finally: unlock(lock_name)
def run(self): """Fetches items from publish queue as per the configuration, calls the transmit function. """ lock_name = get_lock_id('publish', 'enqueue_published') if not lock(lock_name, expire=310): logger.info('Enqueue Task: {} is already running.'.format(lock_name)) return try: items = get_published_items() if len(items) > 0: enqueue_items(items) finally: unlock(lock_name)
def run(self): now = utcnow() expiry_time_log_msg = 'Expiry Time: {}.'.format(now) logger.info('{} Starting to remove expired content at.'.format(expiry_time_log_msg)) lock_name = get_lock_id('archive', 'remove_expired') if not lock(lock_name, '', expire=600): logger.info('{} Remove expired content task is already running.'.format(expiry_time_log_msg)) return try: logger.info('{} Removing expired content for expiry.'.format(expiry_time_log_msg)) self._remove_expired_items(now, expiry_time_log_msg) finally: unlock(lock_name, '') push_notification('content:expired') logger.info('{} Completed remove expired content.'.format(expiry_time_log_msg))
def publish(): """Fetch items from publish queue as per the configuration, call the transmit function.""" with ProfileManager('publish:transmit'): lock_name = get_lock_id("Transmit", "Articles") if not lock(lock_name, expire=1810): logger.info('Task: {} is already running.'.format(lock_name)) return try: for retries in [False, True]: # first publish pending, retries after subs = get_queue_subscribers(retries=retries) for sub in subs: transmit_subscriber_items.delay(str(sub), retries=retries) except Exception: logger.exception('Task: {} failed.'.format(lock_name)) finally: unlock(lock_name)
def update_provider(self, provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ if provider.get('type') == 'search': return if not is_updatable(provider): return lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) host_name = get_host_id(self) if not lock(lock_name, host_name, expire=1800): return try: update = { LAST_UPDATED: utcnow() } for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() ingest_service = superdesk.get_resource_service('ingest_providers') ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=ingest_service._get_administrators(), name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) # Only push a notification if there has been an update if LAST_ITEM_UPDATE in update: push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) finally: unlock(lock_name, host_name)
def send_alerts(self): self.log_msg = 'Company Expiry Alerts: {}'.format(utcnow()) logger.info('{} Starting to send alerts.'.format(self.log_msg)) lock_name = get_lock_id('newsroom', 'company_expiry') if not lock(lock_name, expire=610): logger.error('{} Job already running'.format(self.log_msg)) return try: self.worker() except Exception as e: logger.exception(e) unlock(lock_name) remove_locks() logger.info('{} Completed sending alerts.'.format(self.log_msg))
def transmit_subscriber_items(subscriber, retries=False): lock_name = get_lock_id('Subscriber', 'Transmit', subscriber) is_async = get_resource_service('subscribers').is_async(subscriber) if not lock(lock_name, expire=610): return try: queue_items = get_queue_items(retries, subscriber) for queue_item in queue_items: args = [queue_item[config.ID_FIELD]] kwargs = {'is_async': is_async} if is_async: transmit_item.apply_async(args=args, kwargs=kwargs) else: transmit_item.apply(args=args, kwargs=kwargs, throw=True) finally: unlock(lock_name)
def run(self, page_size=None): logger.info('Starting to fix expired content.') if app.settings.get('PUBLISHED_CONTENT_EXPIRY_MINUTES'): self.expiry_minutes = app.settings['PUBLISHED_CONTENT_EXPIRY_MINUTES'] if page_size: self.default_page_size = int(page_size) lock_name = get_lock_id('archive', 'fix_expired_content') if not lock(lock_name, expire=610): logger.info('Fix expired content task is already running.') return try: self.fix_items_expiry() finally: unlock(lock_name) logger.info('Completed fixing expired content.')
def run(self): now = utcnow() self.log_msg = 'Delete Marked Assignments Time: {}.'.format(now) logger.info('{} Starting to delete marked assignments at.'.format(self.log_msg)) lock_name = get_lock_id('planning', 'delete_assignments') if not lock(lock_name, expire=610): logger.info('{} Delete marked assignments task is already running'.format(self.log_msg)) return try: self._delete_marked_assignments() except Exception as e: logger.exception(e) unlock(lock_name) logger.info('{} Completed deleting marked assignments.'.format(self.log_msg)) remove_locks()
def run(self, page_size=None): if not is_legal_archive_enabled(): return logger.info('Import to Legal Archive') lock_name = get_lock_id('legal_archive', 'import_to_legal_archive') page_size = int(page_size) if page_size else self.default_page_size if not lock(lock_name, expire=1810): return try: legal_archive_import = LegalArchiveImport() # publish_queue = get_resource_service('publish_queue') # move the publish item to legal archive. expired_items = set() for items in self.get_expired_items(page_size): for item in items: self._move_to_legal(item.get('item_id'), item.get(config.VERSION), expired_items) # get the invalid items from archive. for items in get_resource_service(ARCHIVE).get_expired_items(utcnow(), invalid_only=True): for item in items: self._move_to_legal(item.get(config.ID_FIELD), item.get(config.VERSION), expired_items) # if publish item is moved but publish_queue item is not. if len(expired_items): try: for items in legal_archive_import.get_publish_queue_items(page_size, list(expired_items)): legal_archive_import.process_queue_items(items, True) except: logger.exception('Failed to import into legal publish queue via command') # reset the expiry status archive_service = get_resource_service(ARCHIVE) for item_id in expired_items: try: item = archive_service.find_one(req=None, _id=item_id) if item: archive_service.system_update(item_id, {'expiry_status': ''}, item) except: logger.exception('Failed to reset expiry status for item id: {}.'.format(item_id)) except: logger.exception('Failed to import into legal archive.') finally: unlock(lock_name)
def publish(): """ Fetches items from publish queue as per the configuration, calls the transmit function. """ lock_name = get_lock_id("Transmit", "Articles") if not lock(lock_name, '', expire=1800): logger.info('Task: {} is already running.'.format(lock_name)) return try: items = list(get_queue_items()) if len(items) > 0: transmit_items(items) except: logger.exception('Task: {} failed.'.format(lock_name)) finally: unlock(lock_name, '')
def publish(): """ Fetches items from publish queue as per the configuration, calls the transmit function. """ lock_name = get_lock_id("Transmit", "Articles") if not lock(lock_name, "", expire=1800): logger.info("Task: {} is already running.".format(lock_name)) return try: items = list(get_queue_items()) if len(items) > 0: transmit_items(items) except: logger.exception("Task: {} failed.".format(lock_name)) finally: unlock(lock_name, "")
def run(self): now = utcnow() self.log_msg = 'Expiry Time: {}.'.format(now) logger.info('{} Starting to remove expired content at.'.format(self.log_msg)) lock_name = get_lock_id('archive', 'remove_expired') if not lock(lock_name, expire=610): logger.info('{} Remove expired content task is already running.'.format(self.log_msg)) return logger.info('{} Removing expired content for expiry.'.format(self.log_msg)) # both functions should be called, even the first one throw exception, # so they are wrapped with log_exeption self._remove_expired_publish_queue_items() self._remove_expired_items(now) unlock(lock_name) push_notification('content:expired') logger.info('{} Completed remove expired content.'.format(self.log_msg)) remove_locks()
def run(self, desk=None): if desk: self.default_desk = desk logger.info('Starting to export {} desk legal archive content to archived'.format(self.default_desk)) lock_name = get_lock_id('legal_archive', 'export_to_archived') if not lock(lock_name, expire=610): logger.info('Export legal archive to archived task is already running.') return try: list_ids = self._export_to_archived() finally: unlock(lock_name) if list_ids: logger.info('Completed exporting {} {} desk documents from legal archive to text archived'.format( len(list_ids), self.default_desk) ) else: logger.info('Completed but nothing was exported...')