def _delete_spiked_events(self, expiry_datetime): logger.info('{} Starting to delete spiked events'.format(self.log_msg)) events_service = get_resource_service('events') events_deleted = set() series_to_delete = dict() # Obtain the full list of Events that we're to process first # As subsequent queries will change the list of returned items events = dict() for items in events_service.get_expired_items(expiry_datetime, spiked_events_only=True): events.update({item[config.ID_FIELD]: item for item in items}) for event_id, event in events.items(): if event.get('recurrence_id' ) and event['recurrence_id'] not in series_to_delete: spiked, events = self.is_series_expired_and_spiked( event, expiry_datetime) if spiked: series_to_delete[event['recurrence_id']] = events else: events_service.delete_action(lookup={'_id': event_id}) events_deleted.add(event_id) # Delete recurring series for recurrence_id, events in series_to_delete.items(): events_service.delete_action( lookup={'recurrence_id': recurrence_id}) events_deleted.add(events) logger.info('{} {} Events deleted: {}'.format(self.log_msg, len(events_deleted), list(events_deleted)))
def routing(item, desk=None, **kwargs): if desk is None: desk_id = item.get("task", {}).get("desk") if desk_id: desk = get_resource_service("desks").find_one(req=None, _id=desk_id) dest = get_destination_desk(desk) if dest and str(desk["_id"]) != str(dest["_id"]): logger.info('auto-routing item "%s" from desk "%s" to "%s"', item.get("headline"), desk.get("name"), dest.get("name")) try: marked_desks = item.get("marked_desks", []) existing = [ mark for mark in marked_desks if str(mark["desk_id"]) == str(dest["_id"]) ] if not existing: marked_desks.append({ "desk_id": str(dest["_id"]), "date_marked": utcnow(), }) item["marked_desks"] = marked_desks except Exception: logger.exception("auto-routing error") return item
def remove_media_files(doc): """Removes the media files of the given doc. If media files are not references by any other story then delete the media files :param dict doc: document for which the media are being deleted :return boolean: True if files are deleted else false. """ logger.info('Removing Media Files...') references = None if doc.get('renditions'): references = [doc.get('renditions')] if not references: references = [assoc.get('renditions') for assoc in (doc.get(ASSOCIATIONS) or {}).values() if assoc and assoc.get('renditions')] for renditions in references: for rendition in renditions.values(): media = rendition.get('media') if isinstance(rendition.get('media'), str) else str(rendition.get('media')) try: references = get_resource_service('media_references').get(req=None, lookup={ 'media_id': media, 'published': True }) if references.count() == 0: logger.info('Deleting media:{}'.format(rendition.get('media'))) app.media.delete(media) except Exception: logger.exception('Failed to remove Media Id: {} from item: {}'.format(media, doc.get(config.ID_FIELD)))
def remove_media_files(doc): """Removes the media files of the given doc. If media files are not references by any other story then delete the media files :param dict doc: document for which the media are being deleted :return boolean: True if files are deleted else false. """ logger.info('Removing Media Files...') references = None if doc.get('renditions'): references = [doc.get('renditions')] if not references: references = [assoc.get('renditions') for assoc in (doc.get(ASSOCIATIONS) or {}).values() if assoc and assoc.get('renditions')] for renditions in references: for rendition in renditions.values(): media = rendition.get('media') if isinstance(rendition.get('media'), str) else str(rendition.get('media')) try: references = get_resource_service('media_references').get(req=None, lookup={ 'media_id': media, 'published': True }) if references.count() == 0: logger.info('Deleting media:{}'.format(rendition.get('media'))) app.media.delete(media) except Exception: logger.exception('Failed to remove Media Id: {} from item: {}'.format(media, doc.get(config.ID_FIELD))) for attachment in doc.get('attachments', []): lookup = {'_id': attachment['attachment']} get_resource_service('attachments').delete_action(lookup)
def routing(item, desk=None, **kwargs): if desk is None: desk_id = item.get('task', {}).get('desk') if desk_id: desk = get_resource_service('desks').find_one(req=None, _id=desk_id) dest = get_destination_desk(desk) if dest and str(desk['_id']) != str(dest['_id']): logger.info('auto-routing item "%s" from desk "%s" to "%s"', item.get('headline'), desk.get('name'), dest.get('name')) try: marked_desks = item.get('marked_desks', []) existing = [ mark for mark in marked_desks if str(mark['desk_id']) == str(dest['_id']) ] if not existing: marked_desks.append({ 'desk_id': str(dest['_id']), 'date_marked': utcnow(), }) item['marked_desks'] = marked_desks except Exception: logger.exception('auto-routing error') return item
def find_one(self, endpoint_name, req, **lookup): """Find single item. :param endpoint_name: resource name :param req: parsed request :param lookup: additional filter """ backend = self._backend(endpoint_name) item = backend.find_one(endpoint_name, req=req, **lookup) search_backend = self._lookup_backend(endpoint_name, fallback=True) if search_backend: # set the parent for the parent child in elastic search self._set_parent(endpoint_name, item, lookup) item_search = search_backend.find_one(endpoint_name, req=req, **lookup) if item is None and item_search: item = item_search logger.warn(item_msg('item is only in elastic', item)) elif item_search is None and item: logger.warn(item_msg('item is only in mongo', item)) try: logger.info(item_msg('trying to add item to elastic', item)) search_backend.insert(endpoint_name, [item]) except RequestError as e: logger.error(item_msg('failed to add item into elastic error={}'.format(str(e)), item)) return item
def find_one(self, endpoint_name, req, **lookup): """Find single item. :param endpoint_name: resource name :param req: parsed request :param lookup: additional filter """ backend = self._backend(endpoint_name) item = backend.find_one(endpoint_name, req=req, **lookup) search_backend = self._lookup_backend(endpoint_name, fallback=True) if search_backend: # set the parent for the parent child in elastic search self._set_parent(endpoint_name, item, lookup) item_search = search_backend.find_one(endpoint_name, req=req, **lookup) if item is None and item_search: item = item_search logger.warn(item_msg('item is only in elastic', item)) elif item_search is None and item: logger.warn(item_msg('item is only in mongo', item)) try: logger.info(item_msg('trying to add item to elastic', item)) search_backend.insert(endpoint_name, [item]) except RequestError as e: logger.error( item_msg( 'failed to add item into elastic error={}'.format( str(e)), item)) return item
def delete(self, endpoint_name, lookup): """Delete method to delete by using mongo query syntax. :param endpoint_name: Name of the endpoint :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}`` :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1} """ backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()) ids = [doc[config.ID_FIELD] for doc in docs] removed_ids = ids logger.info("total documents to be removed {}".format(len(ids))) if search_backend and ids: removed_ids = [] # first remove it from search backend, so it won't show up. when this is done - remove it from mongo for _id in ids: try: self.remove_from_search(endpoint_name, _id) removed_ids.append(_id) except NotFoundError: logger.warning('item missing from elastic _id=%s' % (_id, )) removed_ids.append(_id) except: logger.exception('item can not be removed from elastic _id=%s' % (_id, )) backend.remove(endpoint_name, {config.ID_FIELD: {'$in': removed_ids}}) logger.info("Removed {} documents from {}.".format(len(ids), endpoint_name)) if not ids: logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
def delete_docs(self, endpoint_name, docs): """Delete using list of documents.""" backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) ids = [doc[config.ID_FIELD] for doc in docs] removed_ids = ids logger.info("total documents to be removed {}".format(len(ids))) if search_backend and ids: removed_ids = [] # first remove it from search backend, so it won't show up. when this is done - remove it from mongo for doc in docs: try: self.remove_from_search(endpoint_name, doc) removed_ids.append(doc[config.ID_FIELD]) except NotFoundError: logger.warning('item missing from elastic _id=%s' % (doc[config.ID_FIELD], )) removed_ids.append(doc[config.ID_FIELD]) except Exception: logger.exception( 'item can not be removed from elastic _id=%s' % (doc[config.ID_FIELD], )) if len(removed_ids): backend.remove(endpoint_name, {config.ID_FIELD: { '$in': removed_ids }}) logger.info("Removed %d documents from %s.", len(removed_ids), endpoint_name) else: logger.warn("No documents for %s resource were deleted.", endpoint_name) return removed_ids
def ping_scanpix(assoc, item): for key in ['OWNER', 'USERNAME', 'PASSWORD']: if not app.config.get('SCANPIX_PING_%s' % key): return try: res = http.post( SCANPIX_PING_URL, json.dumps({ 'type': 'articleUsage', 'data': { 'owner': app.config['SCANPIX_PING_OWNER'], 'media_id': assoc.get('guid', assoc.get('_id')), 'article_id': item.get('guid', item.get('_id')), 'services': [cat.get('name') for cat in item.get('anpa_category', [])], }, }), headers={'content-type': 'application/json'}, auth=(app.config['SCANPIX_PING_USERNAME'], app.config['SCANPIX_PING_PASSWORD']), timeout=PING_TIMEOUT, ) logger.info('scanpix image published status=%d image=%s article=%s', res.status_code, assoc.get('guid', ''), item.get('guid', '')) except Exception as e: logger.exception(e)
def remove_locks(): """ Removes item related locks that are not in use :return: """ result = _lock.collection.delete_many({'$or': [{'_id': re.compile('^item_move'), 'locked': False}, {'_id': re.compile('^item_lock'), 'locked': False}]}) logger.info('unused item locks deleted count={}'.format(result.deleted_count))
def init_app(app): item_publish.connect(publish_scanpix) if app.config.get('SCANPIX_PING_OWNER') and app.config.get( 'SCANPIX_PING_USERNAME'): logger.info('SCANPIX ping owner configured %s', app.config['SCANPIX_PING_OWNER']) else: logger.info('SCANPIX ping owner not set')
def transmit(self, queue_item): try: self._transmit(queue_item, None) logger.info('Successfully transmitted item {}'.format( queue_item.get('item_id'))) except Exception: logger.exception("Failed to transmit the item {}.".format( queue_item.get('item_id')))
def _update(self, provider, update): updated = utcnow() last_updated = provider.get('last_updated') ttl_minutes = app.config['INGEST_EXPIRY_MINUTES'] if not last_updated or last_updated < updated - datetime.timedelta( minutes=ttl_minutes): last_updated = updated - datetime.timedelta(minutes=ttl_minutes) self.provider = provider provider_config = provider.get('config') if not provider_config: provider_config = {} provider['config'] = provider_config self.URL = provider_config.get('url') payload = {} parser = self.get_feed_parser(provider) try: response = requests.get(self.URL, params=payload, timeout=15) # TODO: check if file has been updated since provider last_updated # although some ptovider do not include 'Last-Modified' in headers # so unsure how to do this logger.info('Http Headers: %s', response.headers) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found %s' % payload) logger.info('Ingesting: %s', str(response.content)) if isinstance(parser, NTBEventXMLFeedParser): xml = ET.fromstring(response.content) items = parser.parse(xml, provider) elif isinstance(parser, IcsTwoFeedParser): cal = Calendar.from_ical(response.content) items = parser.parse(cal, provider) else: items = parser.parse(response.content) if isinstance(items, list): yield items else: yield [items]
def unlock(task, host): """Release lock on given task. Lock can be only released by host which locked it. :param task: task name :param host: current host id """ logger.info('releasing lock task=%s host=%s' % (task, host)) return _lock.release(task, host)
def get_ids(self, channel, last_updated, updated): """Get ids of documents which should be updated.""" ids = set() payload = {'channel': channel, 'fieldsRef': 'id'} payload['dateRange'] = "%s-%s" % (self.format_date(last_updated), self.format_date(updated)) logger.info('Reuters requesting Date Range |{}| for channel {}'.format(payload['dateRange'], channel)) tree = self.get_tree('items', payload) for result in tree.findall('result'): ids.add(result.find('guid').text) return ids
def _update(self, provider, update): self.provider = provider parser = self.get_feed_parser(provider) # get the current year, it is used to filter fixtures for this year and next year = int(utcnow().year) % 100 config = provider.get('config', {}) content = self._request( config.get('login_url').format(config.get('username'), config.get('password'))) # get the configured sports configured_sports = config.get('sports').split(',') xml = ET.fromstring(content) if xml.attrib['Status_Code'] == 'OK': session = xml.attrib['Status_Session'] content = self._request( config.get('fixtures_url').format(session, '', '', '')) xml = ET.fromstring(content) for s in xml.findall('.//Sports/Sport'): sport_id = s.attrib['SportID'] if sport_id not in configured_sports: continue sport_name = s.attrib['SportName'] content = self._request( config.get('fixtures_url').format(session, sport_id, '', '')) sport_xml = ET.fromstring(content) for c in sport_xml.findall('.//Competition'): comp_id = c.attrib.get('Comp_ID') comp_name = c.attrib.get('Comp_Name') content = self._request( config.get('fixtures_url').format( session, sport_id, comp_id, '')) comp_xml = ET.fromstring(content) for season in comp_xml.findall('.//Season'): season_id = season.attrib.get('SeasonID') if str(year) in season_id or str(year + 1) in season_id: content = self._request( config.get('fixtures_url').format( session, sport_id, comp_id, season_id)) fixture_xml = ET.fromstring(content) logger.info('Parsing {}/{} {}/{}'.format( sport_id, sport_name, comp_id, comp_name)) items = parser.parse( { 'fixture_xml': fixture_xml, 'sport_id': sport_id, 'sport_name': sport_name, 'comp_name': comp_name, 'comp_id': comp_id }, provider) if len(items) > 0: yield items
def run(self): now = utcnow() self.log_msg = 'Delete Spiked Items Time: {}.'.format(now) logger.info('{} Starting to delete spiked items at.'.format( self.log_msg)) expire_interval = app.config.get('PLANNING_DELETE_SPIKED_MINUTES', 0) if expire_interval == 0: logger.info( '{} PLANNING_DELETE_SPIKED_MINUTES=0, not spiking any items') return lock_name = get_lock_id('planning', 'delete_spiked') if not lock(lock_name, expire=610): logger.info( '{} Delete spiked items task is already running'.format( self.log_msg)) return expiry_datetime = now - timedelta(minutes=expire_interval) try: self._delete_spiked_events(expiry_datetime) except Exception as e: logger.exception(e) try: self._delete_spiked_planning(expiry_datetime) except Exception as e: logger.exception(e) unlock(lock_name) logger.info('{} Completed deleting spiked items.'.format(self.log_msg)) remove_locks()
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider) ids = [item['_id'] for item in items] items.rewind() file_ids = [ rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media') ] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) logger.info('Removed expired content for provider: {0} count: {1}'.format( provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic()
def lock(task, host, expire=300, timeout=None): """Try to lock task. :param task: task name :param host: current host id :param expire: lock ttl in seconds :param timeout: how long should it wait if task is locked """ got_lock = _lock.lock(task, host, expire=expire, timeout=timeout) if got_lock: logger.info('got lock task=%s host=%s' % (task, host)) else: logger.info('task locked already task=%s host=%s' % (task, host)) return got_lock
def _remove_documents_from_search_backend(self, endpoint_name, ids): """ remove documents from search backend. :param endpoint_name: name of the endpoint :param ids: list of ids """ ids = [str(doc_id) for doc_id in ids] batch_size = 500 logger.info("total documents to be removed {}".format(len(ids))) for i in range(0, len(ids), batch_size): batch = ids[i:i + batch_size] query = {'query': {'terms': {'{}._id'.format(endpoint_name): batch}}} app.data._search_backend(endpoint_name).remove(endpoint_name, query) logger.info("Removed {} documents from {}.".format(len(batch), endpoint_name))
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) try: feeding_service = registered_feeding_services[provider['feeding_service']] feeding_service = feeding_service.__class__() ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest' except KeyError: ingest_collection = 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) items = get_expired_items(provider, ingest_collection) ids = [item['_id'] for item in items] items.rewind() file_ids = [rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media')] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) logger.info('Removed expired content for provider: {0} count: {1}' .format(provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic(ingest_collection)
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) ingest_service = superdesk.get_resource_service('ingest') items = get_expired_items(provider) ids = [item['_id'] for item in items] items.rewind() file_ids = [rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media')] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) stats.incr('ingest.expired_items', len(ids)) logger.info('Removed expired content for provider: {0} count: {1}' .format(provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic()
def _get_article_ids(self, channel, last_updated, updated): """ Get article ids which should be upserted. """ ids = set() payload = {'channel': channel, 'fieldsRef': 'id', 'dateRange': "%s-%s" % (self._format_date(last_updated), self._format_date(updated))} logger.info('Reuters requesting Date Range |{}| for channel {}'.format(payload['dateRange'], channel)) tree = self._get_tree('items', payload) for result in tree.findall('result'): ids.add(result.find('guid').text) return ids
def _remove_expired_published_planning(): """Expire planning versions Expiry of the planning versions mirrors the expiry of items within the publish queue in Superdesk so it uses the same configuration value :param self: :return: """ expire_interval = app.config.get('PUBLISH_QUEUE_EXPIRY_MINUTES', 0) if expire_interval: expire_time = utcnow() - timedelta(minutes=expire_interval) logger.info('Removing planning history items created before {}'.format(str(expire_time))) get_resource_service('published_planning').delete({'_id': {'$lte': ObjectId.from_datetime(expire_time)}})
def _flag_expired_events(self, expiry_datetime): logger.info('{} Starting to flag expired events'.format(self.log_msg)) events_service = get_resource_service('events') planning_service = get_resource_service('planning') locked_events = set() events_in_use = set() events_expired = set() plans_expired = set() # Obtain the full list of Events that we're to process first # As subsequent queries will change the list of returned items events = dict() for items in events_service.get_expired_items(expiry_datetime): events.update({item[config.ID_FIELD]: item for item in items}) self._set_event_plans(events) for event_id, event in events.items(): if event.get('lock_user'): locked_events.add(event_id) elif self._get_event_schedule(event) > expiry_datetime: events_in_use.add(event_id) else: events_expired.add(event_id) events_service.system_update(event_id, {'expired': True}, event) for plan in event.get('_plans', []): plan_id = plan[config.ID_FIELD] planning_service.system_update(plan_id, {'expired': True}, plan) plans_expired.add(plan_id) if len(locked_events) > 0: logger.info('{} Skipping {} locked Events: {}'.format( self.log_msg, len(locked_events), list(locked_events) )) if len(events_in_use) > 0: logger.info('{} Skipping {} Events in use: {}'.format( self.log_msg, len(events_in_use), list(events_in_use) )) if len(events_expired) > 0: push_notification( 'events:expired', items=list(events_expired) ) if len(plans_expired) > 0: push_notification( 'planning:expired', items=list(plans_expired) ) logger.info('{} {} Events expired: {}'.format(self.log_msg, len(events_expired), list(events_expired)))
def run(self, now=None): if now: now_utc = now if isinstance(now, datetime) else local_to_utc( app.config['DEFAULT_TIMEZONE'], datetime.strptime(now, '%Y-%m-%dT%H')) else: now_utc = utcnow() now_local = utc_to_local(app.config['DEFAULT_TIMEZONE'], now_utc) logger.info('Starting to send scheduled reports: {}'.format(now_utc)) schedules = self.get_schedules() if len(schedules) < 1: logger.info('No enabled schedules found, not continuing') return # Set now to the beginning of the hour (in local time) now_local = now_local.replace(minute=0, second=0, microsecond=0) for scheduled_report in schedules: schedule_id = str(scheduled_report.get('_id')) try: if not self.should_send_report(scheduled_report, now_local): logger.info( 'Scheduled Report {} not scheduled to be sent'.format( schedule_id)) continue logger.info('Attempting to send Scheduled Report {}'.format( schedule_id)) self._send_report(scheduled_report) # Update the _last_sent of the schedule get_resource_service('scheduled_reports').system_update( scheduled_report.get('_id'), {'_last_sent': now_utc}, scheduled_report) except Exception as e: logger.error( 'Failed to generate report for {}. Error: {}'.format( schedule_id, str(e))) logger.exception(e) logger.info('Completed sending scheduled reports: {}'.format(now_utc))
def find_one(self, endpoint_name, req, **lookup): backend = self._backend(endpoint_name) item = backend.find_one(endpoint_name, req=req, **lookup) search_backend = self._lookup_backend(endpoint_name, fallback=True) if search_backend: item_search = search_backend.find_one(endpoint_name, req=req, **lookup) if item is None and item_search: item = item_search logger.warn(item_msg('item is only in elastic', item)) elif item_search is None and item: logger.warn(item_msg('item is only in mongo', item)) try: logger.info(item_msg('trying to add item to elastic', item)) search_backend.insert(endpoint_name, [item]) except RequestError as e: logger.error(item_msg('failed to add item into elastic error={}'.format(str(e)), item)) return item
def remove_media_files(doc): """Removes the media files of the given doc. If media files are not references by any other story then delete the media files :param dict doc: document for which the media are being deleted :return boolean: True if files are deleted else false. """ references = None if doc.get("renditions"): references = [doc.get("renditions")] if not references: references = [ assoc.get("renditions") for assoc in (doc.get(ASSOCIATIONS) or {}).values() if assoc and assoc.get("renditions") ] if references: logger.info("Removing media files for %s", doc.get("guid")) for renditions in references: for rendition in renditions.values(): media = rendition.get("media") if isinstance( rendition.get("media"), str) else str(rendition.get("media")) try: references = get_resource_service("media_references").get( req=None, lookup={ "media_id": media, "published": True }) if references.count() == 0: logger.info("Deleting media:{}".format( rendition.get("media"))) app.media.delete(media) except Exception: logger.exception( "Failed to remove Media Id: {} from item: {}".format( media, doc.get(config.ID_FIELD))) for attachment in doc.get("attachments", []): lookup = {"_id": attachment["attachment"]} get_resource_service("attachments").delete_action(lookup)
def remove_locks(): """ Removes item related locks that are not in use :return: """ result = _lock.collection.delete_many({ "$or": [ { "_id": re.compile("^item_move"), "locked": False }, { "_id": re.compile("^item_lock"), "locked": False }, ] }) logger.info("unused item locks deleted count={}".format( result.deleted_count))
def _export_events(self): """Export events""" logger.info('Starting to export events') events_service = get_resource_service('events') formatter = JsonEventFormatter() destination = self._get_destination('json_event') formatter.set_destination(destination=destination, subscriber=self.subscriber) transmitter = NewsroomHTTPTransmitter() for items in self._fetch_items(events_service.get): for item in items: try: logger.info('Processing event item: {}'.format( item.get('_id'))) version, event = get_version_item_for_post(item) queue_item = self._get_queue_item(event, formatter._format_item, destination) transmitter.transmit(queue_item) logger.info('Processing processed item: {}'.format( item.get('_id'))) except Exception: logger.exception('Failed to export event: {}'.format( item.get('_id')))
def _flag_expired_planning(self, expiry_datetime): logger.info('{} Starting to flag expired planning items'.format( self.log_msg)) planning_service = get_resource_service('planning') # Obtain the full list of Planning items that we're to process first # As subsequent queries will change the list of returnd items plans = dict() for items in planning_service.get_expired_items(expiry_datetime): plans.update({item[config.ID_FIELD]: item for item in items}) locked_plans = set() plans_expired = set() for plan_id, plan in plans.items(): if plan.get('lock_user'): locked_plans.add(plan_id) else: planning_service.system_update(plan[config.ID_FIELD], {'expired': True}, plan) plans_expired.add(plan_id) if len(locked_plans) > 0: logger.info('{} Skipping {} locked Planning items: {}'.format( self.log_msg, len(locked_plans), list(locked_plans))) if len(plans_expired) > 0: push_notification('planning:expired', items=list(plans_expired)) logger.info('{} {} Planning items expired: {}'.format( self.log_msg, len(plans_expired), list(plans_expired)))
def _export_planning(self): """Export events""" logger.info('Starting to export planning') planning_service = get_resource_service('planning') formatter = JsonPlanningFormatter() destination = self._get_destination('json_planning') formatter.set_destination(destination=destination, subscriber=self.subscriber) transmitter = NewsroomHTTPTransmitter() for items in self._fetch_items(planning_service.get): for item in items: try: logger.info('Processing planning item: {}'.format( item.get('_id'))) version, plan = get_version_item_for_post(item) queue_item = self._get_queue_item(plan, formatter._format_item, destination) transmitter.transmit(queue_item) logger.info('Processed planning item: {}'.format( item.get('item_id'))) except Exception: logger.exception( 'Failed to export planning item: {}'.format( item.get('_id')))
def _update(self, provider, update): self.provider = provider parser = self.get_feed_parser(provider) # get the current year, it is used to filter fixtures for this year and next year = int(utcnow().year) % 100 config = provider.get('config', {}) content = self._request(config.get('login_url').format(config.get('username'), config.get('password'))) # get the configured sports configured_sports = config.get('sports').split(',') xml = ET.fromstring(content) if xml.attrib['Status_Code'] == 'OK': session = xml.attrib['Status_Session'] content = self._request(config.get('fixtures_url').format(session, '', '', '')) xml = ET.fromstring(content) for s in xml.findall('.//Sports/Sport'): sport_id = s.attrib['SportID'] if sport_id not in configured_sports: continue sport_name = s.attrib['SportName'] content = self._request(config.get('fixtures_url').format(session, sport_id, '', '')) sport_xml = ET.fromstring(content) for c in sport_xml.findall('.//Competition'): comp_id = c.attrib.get('Comp_ID') comp_name = c.attrib.get('Comp_Name') content = self._request(config.get('fixtures_url').format(session, sport_id, comp_id, '')) comp_xml = ET.fromstring(content) for season in comp_xml.findall('.//Season'): season_id = season.attrib.get('SeasonID') if str(year) in season_id or str(year + 1) in season_id: content = self._request( config.get('fixtures_url').format(session, sport_id, comp_id, season_id)) fixture_xml = ET.fromstring(content) logger.info('Parsing {}/{} {}/{}'.format(sport_id, sport_name, comp_id, comp_name)) items = parser.parse({'fixture_xml': fixture_xml, 'sport_id': sport_id, 'sport_name': sport_name, 'comp_name': comp_name, 'comp_id': comp_id}, provider) if len(items) > 0: yield items
def generate_stats(self, item_id, gte, chunk_size): items_processed = 0 failed_ids = [] num_history_items = 0 statistics_service = get_resource_service('archive_statistics') # Get the system record from the last run # This document stores the id of the last processed archive_history item last_history = statistics_service.get_last_run() last_entry_id = last_history.get('guid') or None if last_history.get('guid'): logger.info('Found previous run, continuing from history item {}'.format( last_history['guid'] )) iterated_started = utcnow() for history_items in statistics_service.get_history_items( last_entry_id, gte, item_id, chunk_size ): if len(history_items) < 1: logger.info('No more history records to process') break num_history_items += len(history_items) last_entry_id = history_items[-1].get(config.ID_FIELD) items = self.gen_history_timelines(history_items) items_processed += len(items) self.process_timelines(items, failed_ids) time_diff = (utcnow() - iterated_started).total_seconds() logger.info('Processed {}/{} history/item records ({}/{} total) in {} seconds'.format( len(history_items), len(items), num_history_items, items_processed, int(time_diff) )) iterated_started = utcnow() # Don't store the last processed id if we're generating stats for a single item if not item_id: # Create/Update the system record from this run # Storing the id of the last processed archive_history item statistics_service.set_last_run_id(last_entry_id, last_history) return items_processed, failed_ids, num_history_items
def _delete_marked_assignments(self): logger.info('{} Starting to delete marked assignments'.format(self.log_msg)) assignments_service = get_resource_service('assignments') query = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': { 'term': {'_to_delete': True} }, } } } } } req = ParsedRequest() req.args = {'source': json.dumps(query)} assignments_to_delete = assignments_service.get(req=req, lookup=None) failed_assignments = [] assignments_deleted = [] for assignment in assignments_to_delete: assign_id = assignment.get(config.ID_FIELD) try: assignments_service.delete_action(lookup={'_id': assign_id}) assignments_deleted.append( { 'id': assign_id, 'slugline': assignment.get('planning', {}).get('slugline'), 'type': assignment.get('planning', {}).get('g2_content_type') } ) except SuperdeskApiError as e: logger.exception(e) failed_assignments.append(assign_id) logger.info('{} {} Assignments deleted: {}'.format(self.log_msg, len(assignments_deleted), str(assignments_deleted))) if len(assignments_deleted) > 0: push_notification( 'assignments:delete', items=assignments_deleted ) if len(failed_assignments) > 0: logger.info( '{} {} assignments failed deletion: {}'.format(self.log_msg, len(failed_assignments), str(failed_assignments)))
def _backend(self): if not app: raise RuntimeError('You can only use cache within app context.') if not app.cache: cache_url = app.config.get('CACHE_URL', '') if 'redis' in cache_url or 'unix' in cache_url: app.cache = SuperdeskRedisBackend(self.mangler, url=cache_url) logger.info('using redis cache backend') elif cache_url: import hermes.backend.memcached app.cache = hermes.backend.memcached.Backend(self.mangler, servers=[cache_url]) logger.info('using memcached cache backend') else: app.cache = hermes.backend.dict.Backend(self.mangler) logger.info('using dict cache backend') return app.cache
def _get_article_ids(self, channel, last_updated, updated): """ Get article ids which should be upserted also save the poll token that is returned. """ ids = set() payload = {'channel': channel, 'fieldsRef': 'id'} # check if the channel has a pollToken if not fall back to dateRange last_poll_token = self._get_poll_token(channel) if last_poll_token is not None: logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token)) payload['pollToken'] = last_poll_token else: payload['dateRange'] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated)) logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload['dateRange'])) tree = self._get_tree('items', payload) status_code = tree.find('status').get('code') if tree.tag == 'results' else tree.get('code') # check the returned status if status_code != '10': logger.warn("Reuters channel request returned status code {}".format(status_code)) # status code 30 indicates failure if status_code == '30': # invalid token logger.warn("Reuters error on channel {} code {} {}".format(channel, tree.find('error').get('code'), tree.find('error').text)) if tree.find('error').get('code') == '2100': self._save_poll_token(channel, None) logger.warn("Reuters channel invalid token reseting {}".format(status_code)) return ids # extract the returned poll token if there is one poll_token = tree.find('pollToken') if poll_token is not None: # a new token indicated new content if poll_token.text != last_poll_token: logger.info("Reuters channel {} new token {}".format(channel, poll_token.text)) self._save_poll_token(channel, poll_token.text) else: # the token has not changed, so nothing new logger.info("Reuters channel {} nothing new".format(channel)) return ids else: logger.info("Reuters channel {} retrieved no token".format(channel)) return ids for result in tree.findall('result'): id = result.find('id').text ids.add(id) logger.info("Reuters id : {}".format(id)) return ids