def init_app(app): # we have to set URL field here, because config is not available at time # of parsing url = join(app.config['SERVER_URL'], 'newsworthy') url_field = NewsworthyFeedingService.fields[0] assert url_field['id'] == 'url' url_field['default_value'] = url # init_app can be called several times during tests # so we skip registration if we have an AlreadyExistsError try: register_feeding_service(NewsworthyFeedingService) except superdesk.errors.AlreadyExistsError: pass else: register_feeding_service_parser(NewsworthyFeedingService.NAME, 'ninjs') service = NewsworthyWebhookService() resource = NewsworthyWebhookResource("newsworthy", app=app, service=service) resource.authentication = NewsworthyFeedingServiceAuth
parser_restricted_values = ['wufoo'] def __init__(self): self.fields_cache = {} def _update(self, provider, update): user = provider['config']['wufoo_username'] wufoo_data = { "url": WUFOO_URL.format(subdomain=user), "user": user, "api_key": provider['config']['wufoo_api_key'], "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES, "update": update} try: parser = self.get_feed_parser(provider, None) except requests.exceptions.Timeout as ex: raise IngestApiError.apiTimeoutError(ex, provider) except requests.exceptions.TooManyRedirects as ex: raise IngestApiError.apiRedirectError(ex, provider) except requests.exceptions.RequestException as ex: raise IngestApiError.apiRequestError(ex, provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) items = parser.parse(wufoo_data, provider) return [items] register_feeding_service(WufooFeedingService)
"media": media_id, "filename": fileName, "title": 'attachment', "description": "email's attachment" }]) if ids: attachments.append( {'attachment': next(iter(ids), None)}) except Exception as ex: logger.error( "cannot add attachment for %s, %s" % (fileName, ex.args[0])) app.media.delete(media_id) if attachments: for item in items: if item['type'] == 'text': item['attachments'] = attachments item[ 'ednote'] = 'The story has %s attachment(s)' % str( len(attachments)) register_feeding_service(EmailBelgaFeedingService) register_feeding_service_parser(EmailBelgaFeedingService.NAME, EMailRFC822FeedParser.NAME)
:param items: dict with events, ntbId used as a key :type items: dict :return: a list of events """ req = ParsedRequest() req.projection = json.dumps({'ntb_id': 1, 'guid': 1, ITEM_STATE: 1}) req.max_results = len(items) existing_items = superdesk.get_resource_service('events').get_from_mongo( req, { 'ntb_id': { '$in': [ntb_id for ntb_id in items.keys()] } } ) for existing_item in existing_items: if existing_item.get(ITEM_STATE) == WORKFLOW_STATE.INGESTED: # update event items[existing_item['ntb_id']][GUID_FIELD] = existing_item[GUID_FIELD] else: # remove event when it has a state different from 'ingested' del items[existing_item['ntb_id']] return [items[i] for i in items.keys()] register_feeding_service(NTBEventsApiFeedingService) register_feeding_service_parser(NTBEventsApiFeedingService.NAME, 'ntb_events_api_xml')
""" payload = {"id": id} tree = self._get_tree("item", payload) parser = self.get_feed_parser(self.provider, tree) items = parser.parse(tree, self.provider) return items def _fetch_items_in_package(self, item): """ Fetch remote assets for given item. """ items = [] for group in item.get("groups", []): for ref in group.get("refs", []): if "residRef" in ref: items.extend(self._parse_items(ref.get("residRef"))) return items def prepare_href(self, href, mimetype=None): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, "", "", "")) return "%s?auth_token=%s" % (new_href, self._get_auth_token(self.provider, update=True)) register_feeding_service(ReutersHTTPFeedingService) register_feeding_service_parser(ReutersHTTPFeedingService.NAME, "newsml2")
try: root_elt = etree.fromstring(r.text) except Exception: raise IngestApiError.apiRequestError(Exception('error while parsing the request answer')) try: if root_elt.xpath('(//error/text())[1]')[0] != '0': err_msg = root_elt.xpath('(//errormsg/text())[1]')[0] raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg))) except IndexError: raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found')) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath('//RBNews'): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath('.//ServiceQueueId/text()')[0] except IndexError: raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element')) ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id} self.get_url(URL_ACK, params=ack_params) return [items] register_feeding_service(RitzauFeedingService) register_feeding_service_parser(RitzauFeedingService.NAME, 'ritzau')
with open(filepath, 'rb') as f: data = f.read() return BytesIO(data), name, mime class TestProviderService(FeedingService): NAME = 'test' ERRORS = [ProviderError.anpaError(None, None).get_error_description()] def _update(self, provider, update): return [] register_feeding_service(TestProviderService) class CeleryTaskRaceTest(TestCase): def test_the_second_update_fails_if_already_running(self): provider = { '_id': 'abc', 'name': 'test provider', 'update_schedule': { 'minutes': 1 } } removed = mark_task_as_not_running(provider['name'], provider['_id']) self.assertFalse(removed) failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'],
parsed = parser.parse(local_file_path, provider) if isinstance(parsed, dict): parsed = [parsed] items.append(parsed) if do_move: move_dest_file_path = os.path.join( move_dest_path, filename) self._move(ftp, filename, move_dest_file_path) except Exception as e: logger.error( "Error while parsing {filename}: {msg}".format( filename=filename, msg=e)) if do_move: move_dest_file_path_error = os.path.join( move_dest_path_error, filename) self._move(ftp, filename, move_dest_file_path_error) if crt_last_updated: update[LAST_UPDATED] = crt_last_updated return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider) register_feeding_service(FTPFeedingService.NAME, FTPFeedingService(), FTPFeedingService.ERRORS)
from superdesk.celery_task_utils import mark_task_as_not_running, is_task_running from superdesk.errors import SuperdeskApiError, ProviderError from superdesk.io.registry import register_feeding_service, registered_feeding_services from superdesk.io.commands.remove_expired_content import get_expired_items, RemoveExpiredContent from superdesk.io.feeding_services import FeedingService from superdesk.tests import TestCase from superdesk.utc import utcnow class TestProviderService(FeedingService): def _update(self, provider, update): return [] register_feeding_service('test', TestProviderService(), [ProviderError.anpaError(None, None).get_error_description()]) class CeleryTaskRaceTest(TestCase): def test_the_second_update_fails_if_already_running(self): provider = {'_id': 'abc', 'name': 'test provider', 'update_schedule': {'minutes': 1}} removed = mark_task_as_not_running(provider['name'], provider['_id']) self.assertFalse(removed) failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertFalse(failed_to_mark_as_running, 'Failed to mark ingest update as running') failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertTrue(failed_to_mark_as_running, 'Ingest update marked as running, possible race condition') removed = mark_task_as_not_running(provider['name'], provider['_id'])
IngestApiError.apiRequestError().get_error_description(), IngestApiError.apiGeneralError().get_error_description()] def __init__(self): self.fields_cache = {} def _update(self, provider, update): user = provider['config']['wufoo_username'] wufoo_data = { "url": WUFOO_URL.format(subdomain=user), "user": user, "api_key": provider['config']['wufoo_api_key'], "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES, "update": update} try: parser = self.get_feed_parser(provider, None) except requests.exceptions.Timeout as ex: raise IngestApiError.apiTimeoutError(ex, provider) except requests.exceptions.TooManyRedirects as ex: raise IngestApiError.apiRedirectError(ex, provider) except requests.exceptions.RequestException as ex: raise IngestApiError.apiRequestError(ex, provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) items = parser.parse(wufoo_data, provider) return [items] register_feeding_service(WufooFeedingService.NAME, WufooFeedingService(), WufooFeedingService.ERRORS)
try: if not os.path.exists(os.path.join(file_path, "_PROCESSED/")): os.makedirs(os.path.join(file_path, "_PROCESSED/")) if not os.path.exists(os.path.join(file_path, "_ERROR/")): os.makedirs(os.path.join(file_path, "_ERROR/")) except Exception as ex: raise IngestFileError.folderCreateError(ex, provider) try: if success: shutil.copy2(os.path.join(file_path, filename), os.path.join(file_path, "_PROCESSED/")) else: shutil.copy2(os.path.join(file_path, filename), os.path.join(file_path, "_ERROR/")) except Exception as ex: raise IngestFileError.fileMoveError(ex, provider) finally: os.remove(os.path.join(file_path, filename)) def is_latest_content(self, last_updated, provider_last_updated=None): """ Parse file only if it's not older than provider last update -10m """ if not provider_last_updated: provider_last_updated = utcnow() - timedelta(days=7) return provider_last_updated - timedelta(minutes=10) < last_updated register_feeding_service(FileFeedingService.NAME, FileFeedingService(), FileFeedingService.ERRORS)
for image in image_items: item_references.append({'residRef': image['guid']}) return package def _create_image_items(self, image_links, text_item): image_items = [] for image_url in image_links: guid_hash = hashlib.sha1(image_url.encode('utf8')).hexdigest() img_item = { 'guid': generate_guid(type=GUID_TAG, id=text_item.get('guid') + guid_hash + '-image'), ITEM_TYPE: CONTENT_TYPE.PICTURE, 'versioncreated': text_item.get('versioncreated'), 'firstcreated': text_item.get('firstcreated'), 'headline': text_item.get('headline', ''), 'renditions': { 'baseImage': { 'href': image_url } } } image_items.append(img_item) return image_items register_feeding_service(TwitterFeedingService)
except imaplib.IMAP4.error: raise IngestEmailError.emailLoginError(imaplib.IMAP4.error, provider) rv, data = imap.select(config.get('mailbox', None), readonly=False) if rv == 'OK': rv, data = imap.search(None, config.get('filter', '(UNSEEN)')) if rv == 'OK': new_items = [] for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK': try: parser = self.get_feed_parser(provider, data) new_items.append(parser.parse(data, provider)) rv, data = imap.store(num, '+FLAGS', '\\Seen') except IngestEmailError: continue imap.close() imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href, mimetype=None): return url_for_media(href, mimetype) register_feeding_service(EmailFeedingService.NAME, EmailFeedingService(), EmailFeedingService.ERRORS)
if rv != 'OK': raise IngestEmailError.emailMailboxError() try: rv, data = imap.search(None, config.get('filter', '(UNSEEN)')) if rv != 'OK': raise IngestEmailError.emailFilterError() for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK' and not test: try: parser = self.get_feed_parser(provider, data) new_items.append(parser.parse(data, provider)) rv, data = imap.store(num, '+FLAGS', '\\Seen') except IngestEmailError: continue finally: imap.close() finally: imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href, mimetype=None): return url_for_media(href, mimetype) register_feeding_service(EmailFeedingService.NAME, EmailFeedingService(), EmailFeedingService.ERRORS)
superdesk.privilege(name='planning_event_spike', label='Planning - Spike Event Items', description='Ability to spike an Event') superdesk.privilege(name='planning_event_unspike', label='Planning - Unspike Event Items', description='Ability to unspike an Event') superdesk.intrinsic_privilege(PlanningUnlockResource.endpoint_name, method=['POST']) superdesk.intrinsic_privilege(EventsUnlockResource.endpoint_name, method=['POST']) import planning.output_formatters # noqa app.client_config['max_recurrent_events'] = get_max_recurrent_events(app) register_feeding_service(EventFileFeedingService.NAME, EventFileFeedingService(), EventFileFeedingService.ERRORS) register_feeding_service(EventHTTPFeedingService.NAME, EventHTTPFeedingService(), EventHTTPFeedingService.ERRORS) register_feeding_service(EventEmailFeedingService.NAME, EventEmailFeedingService(), EventEmailFeedingService.ERRORS) register_feed_parser(IcsTwoFeedParser.NAME, IcsTwoFeedParser()) register_feed_parser(NTBEventXMLFeedParser.NAME, NTBEventXMLFeedParser())
def _create_image_items(self, image_links, text_item): image_items = [] for image_url in image_links: guid_hash = hashlib.sha1(image_url.encode('utf8')).hexdigest() img_item = { 'guid': generate_guid(type=GUID_TAG, id=text_item.get('guid') + guid_hash + '-image'), ITEM_TYPE: CONTENT_TYPE.PICTURE, 'versioncreated': text_item.get('versioncreated'), 'firstcreated': text_item.get('firstcreated'), 'headline': text_item.get('headline', ''), 'renditions': { 'baseImage': { 'href': image_url } } } image_items.append(img_item) return image_items register_feeding_service(TwitterFeedingService.NAME, TwitterFeedingService(), TwitterFeedingService.ERRORS)
results_str = re.search('[0-9]+', item_ident).group() if results_str is None: raise IngestApiError.apiGeneralError( Exception(response.text), provider) num_results = int(results_str) if num_results > 0: items.append(response.text) if offset >= num_results: return items offset += offset_jump else: if re.match('Error: No API Key provided', response.text): raise IngestApiError.apiAuthError( Exception(response.text), provider) elif response.status_code == 404: raise IngestApiError.apiNotFoundError( Exception(response.reason), provider) else: raise IngestApiError.apiGeneralError( Exception(response.reason), provider) return items register_feeding_service(BBCLDRSFeedingService)
move_dest_file_path, file_modify, failed=failed) except EmptyFile: continue except Exception as e: logger.error( "Error while parsing {filename}: {msg}".format( filename=filename, msg=e)) if do_move: move_dest_file_path_error = os.path.join( move_path_error, filename) self._move(ftp, filename, move_dest_file_path_error, file_modify, failed=True) self._log_msg( "Processing finished. Exec time: {:.4f} secs.".format( self._timer.stop("start_processing"))) except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider) register_feeding_service(FTPFeedingService)
'sport_name': sport_name, 'comp_name': comp_name, 'comp_id': comp_id }, provider) if len(items) > 0: yield items def _request(self, url): try: response = requests.get(url, params={}, timeout=120) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found') return response.content register_feeding_service(AAPSportsHTTPFeedingService)
if not os.path.exists(os.path.join(file_path, "_PROCESSED/")): os.makedirs(os.path.join(file_path, "_PROCESSED/")) if not os.path.exists(os.path.join(file_path, "_ERROR/")): os.makedirs(os.path.join(file_path, "_ERROR/")) except Exception as ex: raise IngestFileError.folderCreateError(ex, provider) try: if success: shutil.copy2(os.path.join(file_path, filename), os.path.join(file_path, "_PROCESSED/")) else: shutil.copy2(os.path.join(file_path, filename), os.path.join(file_path, "_ERROR/")) except Exception as ex: raise IngestFileError.fileMoveError(ex, provider) finally: os.remove(os.path.join(file_path, filename)) def get_last_updated(self, file_path): """Get last updated time for file. Using both mtime and ctime timestamps not to miss old files being copied around and recent files after changes done in place. """ stat = os.lstat(file_path) timestamp = max(stat.st_mtime, stat.st_ctime) return datetime.fromtimestamp(timestamp, tz=utc) register_feeding_service(FileFeedingService)
item_details['media_link'] = media_link return item_details def _fetch_media_link(self, source_id, item_id): """ Fetch a list of all available renditions for an item and return a link to file :param source_id: :param item_id: :return str or None: link to the image or None """ # fetch media renditions media_renditions = self.get_url( url=self.HTTP_ITEM_MEDIA_LIST_URL.format(source_id=source_id, item_id=item_id) ) for rend in media_renditions: if rend.get('kind') in self.ALLOWED_MEDIA_KINDS and rend.get('mimeType') in self.ALLOWED_MEDIA_MIMETYPES: media_id = rend.get('id') if media_id: return self.HTTP_ITEM_MEDIA_DETAILS_URL.format( source_id=source_id, item_id=item_id, media_id=media_id ) return None register_feeding_service(ANPNewsApiFeedingService) register_feeding_service_parser(ANPNewsApiFeedingService.NAME, 'anp_news_api')
except Exception as ex: raise IngestApiError.apiGeneralError(ex, self.provider) data = json.loads(response.content.decode('UTF-8')) service = get_resource_service('traffic_incidents') incidents = [] for feature in data.get('features', []): props = feature.get('properties', {}) incident = { 'guid': int(props.get('id')), 'start_date': convert_date(props.get('startDate')), 'end_date': convert_date(props.get('endDate')), 'incident_type': props.get('type'), 'incident_description': props.get('description'), 'city': props.get('city'), 'state': props.get('state'), 'from_street_name': props.get('fromStreetName'), 'from_cross_street_name': props.get('fromCrossStreetName'), 'to_street_name': props.get('toStreetName'), 'to_cross_street_name': props.get('toCrossStreetName'), 'geometry': feature.get('geometry') } incident.get('geometry').pop('crs') incidents.append(incident) service.delete(lookup={}) service.post(incidents) register_feeding_service(IntelematicsIncidentHTTPFeedingService)
# # This file is part of Superdesk. # # Copyright 2013, 2014 Sourcefabric z.u. and contributors. # # For the full copyright and license information, please see the # AUTHORS and LICENSE files distributed with this source code, or # at https://www.sourcefabric.org/superdesk/license from superdesk.io.feeding_services import FTPFeedingService from superdesk.io.registry import register_feeding_service, register_feeding_service_parser class NTBEventsFTPFeedingService(FTPFeedingService): """ Feeding Service class which can read events from NTB via FTP """ NAME = 'ntb_events_ftp' label = 'NTB Events FTP' service = 'events' register_feeding_service(NTBEventsFTPFeedingService) register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ntb_events_api_xml') register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ntb_event_xml') register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ics20') register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ntb_nifs')
Parse item message and return given items. """ payload = {'id': id} tree = self._get_tree('item', payload) parser = self.get_feed_parser(self.provider, tree) items = parser.parse(tree, self.provider) return items def _fetch_items_in_package(self, item): """ Fetch remote assets for given item. """ items = [] for group in item.get('groups', []): for ref in group.get('refs', []): if 'residRef' in ref: items.extend(self._parse_items(ref.get('residRef'))) return items def prepare_href(self, href, mimetype=None): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, '', '', '')) return '%s?auth_token=%s' % (new_href, self._get_auth_token(self.provider, update=True)) register_feeding_service(ReutersHTTPFeedingService.NAME, ReutersHTTPFeedingService(), ReutersHTTPFeedingService.ERRORS)
params['minDateTime'] = min_date_time params['sequenceNumber'] = sequence_number try: r = requests.get(URL, auth=(user, password), params=params) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) try: root_elt = etree.fromstring(r.content) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) parser = self.get_feed_parser(provider) items = parser.parse(root_elt, provider) try: min_date_time = root_elt.xpath('//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip() sequence_number = root_elt.xpath('//iptc:transmitId/text()', namespaces=NS)[0].strip() except IndexError: raise IngestApiError.apiRequestError(Exception('missing minDateTime or transmitId')) else: update.setdefault('private', {}) update['private']['min_date_time'] = min_date_time update['private']['sequence_number'] = sequence_number return [items] register_feeding_service(APFeedingService)
def _save(self, prices, market): service = get_resource_service('fuel') # Get the date for the current save today = datetime.now().isoformat()[:10] fuel_records = [] for price in prices: # Extract the servo id, address servo = price.get('fuelStation') servo_address = servo.get('address') servo_location = {'type': 'Point', 'coordinates': [servo.get('location').get('lon'), servo.get('location').get('lat')]} # Scan the fuel types available at the servo and extract them for fuelType in [f for f in servo.get('fuelTypes') if f.get('pricesAvailable')]: type_price = next( iter([p for p in price.get('fuelPrice') if p.get('fuelType') == fuelType.get('value')])) # Construct the record fuel_record = {'sample_date': today, 'market': market.get('market'), 'address': servo_address, 'fuel_type': fuelType.get('value'), 'location': servo_location, 'price': type_price.get('price')} fuel_records.append(fuel_record) if len(fuel_records): # delete the old dataset for today service.delete(lookup={'market': market.get('market'), 'sample_date': today}) # post the new ones service.post(fuel_records) register_feeding_service(IntelematicsFuelHTTPFeedingService)
'firstcreated': text_item['firstcreated'], 'versioncreated': text_item['versioncreated'], 'headline': text_item.get('headline', ''), 'groups': [{ 'id': 'root', 'role': 'grpRole:NEP', 'refs': [{ 'idRef': 'main' }], }, { 'id': 'main', 'role': 'main', 'refs': [], }] } item_references = package['groups'][1]['refs'] item_references.append({'residRef': text_item['guid']}) for image in image_items: item_references.append({'residRef': image['guid']}) return package register_feeding_service(RSSFeedingService.NAME, RSSFeedingService(), RSSFeedingService.ERRORS)
items = [] offset = 0 while True: params["offset"] = offset response = self.get_url(url, params=params, headers=headers) # The total number of results are given to us in json, get them # via a regex to read the field so we don't have to convert the # whole thing to json pointlessly item_ident = re.search('"total": *[0-9]*', response.text).group() results_str = re.search("[0-9]+", item_ident).group() if results_str is None: raise IngestApiError.apiGeneralError(Exception(response.text), self.provider) num_results = int(results_str) if num_results > 0: items.append(response.text) if offset >= num_results: return items offset += offset_jump return items register_feeding_service(BBCLDRSFeedingService)
raise IngestApiError.apiRequestError( Exception('Invalid XML, <error> element not found')) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath('//RBNews'): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath('.//ServiceQueueId/text()')[0] except IndexError: raise IngestApiError.apiRequestError( Exception('missing ServiceQueueId element')) ack_params = { 'user': user, 'password': password, 'servicequeueid': queue_id } try: requests.get(URL_ACK, params=ack_params) except Exception as e: raise IngestApiError.apiRequestError( Exception('error while doing the request')) return [items] register_feeding_service(RitzauFeedingService.NAME, RitzauFeedingService(), RitzauFeedingService.ERRORS)
""" payload = {'id': id} tree = self._get_tree('item', payload) parser = self.get_feed_parser(self.provider, tree) items = parser.parse(tree, self.provider) return items def _fetch_items_in_package(self, item): """ Fetch remote assets for given item. """ items = [] for group in item.get('groups', []): for ref in group.get('refs', []): if 'residRef' in ref: items.extend(self._parse_items(ref.get('residRef'))) return items def prepare_href(self, href, mimetype=None): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, '', '', '')) return '%s?auth_token=%s' % (new_href, self._get_auth_token(self.provider, update=True)) register_feeding_service(ReutersHTTPFeedingService) register_feeding_service_parser(ReutersHTTPFeedingService.NAME, 'newsml2')
from superdesk.io.feeding_services.file_service import FileFeedingService from superdesk.tests import TestCase from superdesk.utc import utcnow class TestProviderService(FeedingService): NAME = 'test' ERRORS = [ProviderError.anpaError(None, None).get_error_description()] def _update(self, provider, update): return [] register_feeding_service(TestProviderService) class CeleryTaskRaceTest(TestCase): def test_the_second_update_fails_if_already_running(self): provider = {'_id': 'abc', 'name': 'test provider', 'update_schedule': {'minutes': 1}} removed = mark_task_as_not_running(provider['name'], provider['_id']) self.assertFalse(removed) failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertFalse(failed_to_mark_as_running, 'Failed to mark ingest update as running') failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertTrue(failed_to_mark_as_running, 'Ingest update marked as running, possible race condition') removed = mark_task_as_not_running(provider['name'], provider['_id'])
Exception('error while doing the request')) try: root_elt = etree.fromstring(r.content) except Exception as e: raise IngestApiError.apiRequestError( Exception('error while doing the request')) parser = self.get_feed_parser(provider) items = parser.parse(root_elt, provider) try: min_date_time = root_elt.xpath( '//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip() sequence_number = root_elt.xpath('//iptc:transmitId/text()', namespaces=NS)[0].strip() except IndexError: raise IngestApiError.apiRequestError( Exception('missing minDateTime or transmitId')) else: update.setdefault('private', {}) update['private']['min_date_time'] = min_date_time update['private']['sequence_number'] = sequence_number return [items] register_feeding_service(APFeedingService.NAME, APFeedingService(), APFeedingService.ERRORS)
logger.info('Parsing {}/{} {}/{}'.format(sport_id, sport_name, comp_id, comp_name)) items = parser.parse({'fixture_xml': fixture_xml, 'sport_id': sport_id, 'sport_name': sport_name, 'comp_name': comp_name, 'comp_id': comp_id}, provider) if len(items) > 0: yield items def _request(self, url): try: response = requests.get(url, params={}, timeout=120) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found') return response.content register_feeding_service(AAPSportsHTTPFeedingService)
'tokens': { 'auth_token': token, 'created': utcnow() - timedelta(hours=hours), } } class TestFeedingService(HTTPFeedingService): NAME = 'test' ERRORS = [] def _update(self, provider, update): pass register_feeding_service(TestFeedingService.NAME, TestFeedingService(), TestFeedingService.ERRORS) class ErrorResponseSession(MagicMock): def get(self, *args, **kwargs): response = requests.Response() response.status_code = 401 return response class GetTokenTestCase(TestCase): def test_get_null_token(self): provider = {} self.assertEquals('', TestFeedingService()._get_auth_token(provider))
""" package = { ITEM_TYPE: CONTENT_TYPE.COMPOSITE, 'guid': generate_guid(type=GUID_TAG), 'firstcreated': text_item['firstcreated'], 'versioncreated': text_item['versioncreated'], 'headline': text_item.get('headline', ''), 'groups': [ { 'id': 'root', 'role': 'grpRole:NEP', 'refs': [{'idRef': 'main'}], }, { 'id': 'main', 'role': 'main', 'refs': [], } ] } item_references = package['groups'][1]['refs'] item_references.append({'residRef': text_item['guid']}) for image in image_items: item_references.append({'residRef': image['guid']}) return package register_feeding_service(RSSFeedingService)
""" package = { ITEM_TYPE: CONTENT_TYPE.COMPOSITE, 'guid': generate_guid(type=GUID_TAG), 'firstcreated': text_item['firstcreated'], 'versioncreated': text_item['versioncreated'], 'headline': text_item.get('headline', ''), 'groups': [ { 'id': 'root', 'role': 'grpRole:NEP', 'refs': [{'idRef': 'main'}], }, { 'id': 'main', 'role': 'main', 'refs': [], } ] } item_references = package['groups'][1]['refs'] item_references.append({'residRef': text_item['guid']}) for image in image_items: item_references.append({'residRef': image['guid']}) return package register_feeding_service(RSSFeedingService.NAME, RSSFeedingService(), RSSFeedingService.ERRORS)
raise IngestEmailError.emailMailboxError() try: rv, data = imap.search(None, config.get('filter', '(UNSEEN)')) if rv != 'OK': raise IngestEmailError.emailFilterError() for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK' and not test: try: parser = self.get_feed_parser(provider, data) new_items.append(parser.parse(data, provider)) rv, data = imap.store(num, '+FLAGS', '\\Seen') except IngestEmailError: continue finally: imap.close() finally: imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href, mimetype=None): return url_for_media(href, mimetype) register_feeding_service(EmailFeedingService) register_feeding_service_parser(EmailFeedingService.NAME, 'email_rfc822')
except Exception: raise IngestApiError.apiRequestError(Exception('error while parsing the request answer')) try: if root_elt.xpath('(//error/text())[1]')[0] != '0': err_msg = root_elt.xpath('(//errormsg/text())[1]')[0] raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg))) except IndexError: raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found')) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath('//RBNews'): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath('.//ServiceQueueId/text()')[0] except IndexError: raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element')) ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id} try: requests.get(URL_ACK, params=ack_params) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) return [items] register_feeding_service(RitzauFeedingService)
payload = {'id': id} tree = self._get_tree('item', payload) parser = self.get_feed_parser(self.provider, tree) items = parser.parse(tree, self.provider) return items def _fetch_items_in_package(self, item): """ Fetch remote assets for given item. """ items = [] for group in item.get('groups', []): for ref in group.get('refs', []): if 'residRef' in ref: items.extend(self._parse_items(ref.get('residRef'))) return items def prepare_href(self, href, mimetype=None): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, '', '', '')) return '%s?auth_token=%s' % ( new_href, self._get_auth_token(self.provider, update=True)) register_feeding_service(ReutersHTTPFeedingService.NAME, ReutersHTTPFeedingService(), ReutersHTTPFeedingService.ERRORS)
def init_app(_app): register_feeding_service(CNAFeedingService) register_feeding_service_parser(CNAFeedingService.NAME, None)
generate_guid(type=GUID_TAG), 'firstcreated': text_item['firstcreated'], 'versioncreated': text_item['versioncreated'], 'headline': text_item.get('headline', ''), 'groups': [{ 'id': 'root', 'role': 'grpRole:NEP', 'refs': [{ 'idRef': 'main' }], }, { 'id': 'main', 'role': 'main', 'refs': [], }] } item_references = package['groups'][1]['refs'] item_references.append({'residRef': text_item['guid']}) for image in image_items: item_references.append({'residRef': image['guid']}) return package register_feeding_service(RSSFeedingService)
parser_restricted_values = ['wufoo'] def __init__(self): self.fields_cache = {} def _update(self, provider, update): user = provider['config']['wufoo_username'] wufoo_data = { "url": WUFOO_URL.format(subdomain=user), "user": user, "api_key": provider['config']['wufoo_api_key'], "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES, "update": update } try: parser = self.get_feed_parser(provider, None) except requests.exceptions.Timeout as ex: raise IngestApiError.apiTimeoutError(ex, provider) except requests.exceptions.TooManyRedirects as ex: raise IngestApiError.apiRedirectError(ex, provider) except requests.exceptions.RequestException as ex: raise IngestApiError.apiRequestError(ex, provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) items = parser.parse(wufoo_data, provider) return [items] register_feeding_service(WufooFeedingService)
location[0]['name'], 'address.line': location[0]['address']['line'], 'address.country': location[0]['address']['country'], })) if saved_location and status == 'UPDATED': location_service.patch( saved_location[0][superdesk.config.ID_FIELD], location[0]) elif not saved_location: _location = deepcopy(location) location_service.post(_location) item['location'][0]['qcode'] = _location[0]['guid'] old_item = events_service.find_one(guid=item[GUID_FIELD], req=None) if not old_item: if not status: item.setdefault('firstcreated', datetime.now()) item.setdefault('versioncreated', datetime.now()) list_items.append(item) else: old_item.update(item) list_items.append(old_item) return list_items register_feeding_service(SpreadsheetFeedingService) register_feeding_service_parser(SpreadsheetFeedingService.NAME, 'belgaspreadsheet')
try: rv, data = imap.search(None, config.get('filter', '(UNSEEN)')) if rv != 'OK': raise IngestEmailError.emailFilterError() for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK' and not test: try: parser = self.get_feed_parser(provider, data) new_items.append(parser.parse(data, provider)) rv, data = imap.store(num, '+FLAGS', '\\Seen') except IngestEmailError: continue finally: imap.close() finally: imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href, mimetype=None): return url_for_media(href, mimetype) register_feeding_service(EmailFeedingService) register_feeding_service_parser(EmailFeedingService.NAME, 'email_rfc822')
from superdesk.errors import SuperdeskApiError, ProviderError from superdesk.io.registry import register_feeding_service, registered_feeding_services from superdesk.io.commands.remove_expired_content import get_expired_items, RemoveExpiredContent from superdesk.io.feeding_services import FeedingService from superdesk.io.feeding_services.file_service import FileFeedingService from superdesk.tests import TestCase from superdesk.utc import utcnow class TestProviderService(FeedingService): def _update(self, provider, update): return [] register_feeding_service( 'test', TestProviderService(), [ProviderError.anpaError(None, None).get_error_description()]) class CeleryTaskRaceTest(TestCase): def test_the_second_update_fails_if_already_running(self): provider = { '_id': 'abc', 'name': 'test provider', 'update_schedule': { 'minutes': 1 } } removed = mark_task_as_not_running(provider['name'], provider['_id']) self.assertFalse(removed) failed_to_mark_as_running = is_task_running(provider['name'],
def _create_image_items(self, image_links, text_item): image_items = [] for image_url in image_links: guid_hash = hashlib.sha1(image_url.encode('utf8')).hexdigest() img_item = { 'guid': generate_guid(type=GUID_TAG, id=text_item.get('guid') + guid_hash + '-image'), ITEM_TYPE: CONTENT_TYPE.PICTURE, 'versioncreated': text_item.get('versioncreated'), 'firstcreated': text_item.get('firstcreated'), 'headline': text_item.get('headline', ''), 'renditions': { 'baseImage': { 'href': image_url } } } image_items.append(img_item) return image_items register_feeding_service(TwitterFeedingService)
nitf_ref = complete_item.get('data', {}).get('item', {}).get('renditions', {}).get('nitf', {}).get( 'href') if nitf_ref: logger.info('Get AP nitf : {}'.format(nitf_ref)) r = self.get_url(url=nitf_ref, params={'apikey': provider.get('config', {}).get('apikey')}, verify=False, allow_redirects=True) r.raise_for_status() root_elt = etree.fromstring(r.content) nitf_item = nitf.NITFFeedParser().parse(root_elt) complete_item['nitf'] = nitf_item else: if item.get('item', {}).get('type') == 'text': logger.warning('No NITF for story {}'.format(item.get('item', {}).get('uri'))) parsed_items.append(parser.parse(complete_item, provider)) # Any exception processing an indivisual item is swallowed except Exception as ex: logger.exception(ex) # Save the link for next time upd_provider = provider.get('config') upd_provider['next_link'] = nextLink upd_provider['recoverytime'] = None update['config'] = upd_provider return [parsed_items] register_feeding_service(APMediaFeedingService)
'tokens': { 'auth_token': token, 'created': utcnow() - timedelta(hours=hours), } } class TestFeedingService(HTTPFeedingService): NAME = TEST_FEEDING_SERVICE_NAME ERRORS = [] def _update(self, provider, update): pass register_feeding_service(TestFeedingService.NAME, TestFeedingService(), TestFeedingService.ERRORS) class ErrorResponseSession(MagicMock): def get(self, *args, **kwargs): response = requests.Response() response.status_code = 401 return response class GetTokenTestCase(TestCase): def test_get_null_token(self): provider = {} self.assertEquals('', TestFeedingService()._get_auth_token(provider)) def test_get_existing_token(self):
'tokens': { 'auth_token': token, 'created': utcnow() - timedelta(hours=hours), } } class TestFeedingService(HTTPFeedingService): NAME = TEST_FEEDING_SERVICE_NAME ERRORS = [] def _update(self, provider, update): pass register_feeding_service(TestFeedingService) class ErrorResponseSession(MagicMock): def get(self, *args, **kwargs): response = requests.Response() response.status_code = 401 return response class GetTokenTestCase(TestCase): def test_get_null_token(self): provider = {} self.assertEquals('', TestFeedingService()._get_auth_token(provider))