def _fetch_data(self, config, provider): url = config['url'] api_key = config['api_key'] last_update = provider.get( 'last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S') # Results are pagified so we'll read this many at a time offset_jump = 10 params = {'start': last_update, 'limit': offset_jump} headers = {'apikey': api_key} items = [] offset = 0 while True: params['offset'] = offset try: response = requests.get(url, params=params, headers=headers, timeout=30) except requests.exceptions.ConnectionError as err: raise IngestApiError.apiConnectionError(exception=err) if response.ok: # The total number of results are given to us in json, get them # via a regex to read the field so we don't have to convert the # whole thing to json pointlessly item_ident = re.search('\"total\": *[0-9]*', response.text).group() results_str = re.search('[0-9]+', item_ident).group() if results_str is None: raise IngestApiError.apiGeneralError( Exception(response.text), provider) num_results = int(results_str) if num_results > 0: items.append(response.text) if offset >= num_results: return items offset += offset_jump else: if re.match('Error: No API Key provided', response.text): raise IngestApiError.apiAuthError(Exception(response.text), provider) elif response.status_code == 404: raise IngestApiError.apiNotFoundError( Exception(response.reason), provider) else: raise IngestApiError.apiGeneralError( Exception(response.reason), provider) return items
def _update(self, provider, update): try: config = provider['config'] user = config['username'] password = config['password'] id_list = config['idList'] except KeyError as e: SuperdeskIngestError.notConfiguredError( Exception('username, password and idList are needed')) # we remove spaces and empty values from id_list to do a clean list id_list = ','.join( [id_.strip() for id_ in id_list.split(',') if id_.strip()]) params = { 'idList': id_list, 'idListType': 'products', 'format': '5', 'maxItems': '25', 'sortOrder': 'chronological' } try: min_date_time = provider['private']['min_date_time'] sequence_number = provider['private']['sequence_number'] except KeyError: pass else: params['minDateTime'] = min_date_time params['sequenceNumber'] = sequence_number try: r = requests.get(URL, auth=(user, password), params=params) except Exception as e: raise IngestApiError.apiRequestError( Exception('error while doing the request')) try: root_elt = etree.fromstring(r.content) except Exception as e: raise IngestApiError.apiRequestError( Exception('error while doing the request')) parser = self.get_feed_parser(provider) items = parser.parse(root_elt, provider) try: min_date_time = root_elt.xpath( '//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip() sequence_number = root_elt.xpath('//iptc:transmitId/text()', namespaces=NS)[0].strip() except IndexError: raise IngestApiError.apiRequestError( Exception('missing minDateTime or transmitId')) else: update.setdefault('private', {}) update['private']['min_date_time'] = min_date_time update['private']['sequence_number'] = sequence_number return [items]
def _fetch_data(self, config, provider): """Fetch the latest feed data. :param dict config: RSS resource configuration :param provider: data provider instance, needed as an argument when raising ingest errors :return: fetched RSS data :rtype: str :raises IngestApiError: if fetching data fails for any reason (e.g. authentication error, resource not found, etc.) """ url = config["url"] if config.get("auth_required", False): auth = (config.get("username"), config.get("password")) else: auth = None response = requests.get(url, auth=auth) if response.ok: return response.content else: if response.status_code in (401, 403): raise IngestApiError.apiAuthError(Exception(response.reason), provider) elif response.status_code == 404: raise IngestApiError.apiNotFoundError(Exception(response.reason), provider) else: raise IngestApiError.apiGeneralError(Exception(response.reason), provider)
def _test(self, provider): config = provider.get('config', {}) url = config['url'] api_key = config['api_key'] # limit the data to a single article and filter out all article fields # to save bandwidth params = {'limit': 1, 'fields': 'id'} headers = {'apikey': api_key} try: response = requests.get(url, params=params, headers=headers, timeout=30) except requests.exceptions.ConnectionError as err: raise IngestApiError.apiConnectionError(exception=err) if not response.ok: if response.status_code == 404: raise IngestApiError.apiNotFoundError( Exception(response.reason), provider) else: raise IngestApiError.apiGeneralError( Exception(response.reason), provider)
def _update(self, provider, update): config = self.config try: user, password = self.config["username"], self.config["password"] except KeyError: SuperdeskIngestError.notConfiguredError( Exception("username and password are needed")) url_override = config.get("url", "").strip() if not url_override.startswith("http"): SuperdeskIngestError.notConfiguredError( Exception("if URL is set, it must be a valid http link")) if url_override: params = {"user": user, "password": password, "maksAntal": 50} else: params = { "user": user, "password": password, "maksAntal": 50, "waitAcknowledge": "true" } r = self.get_url(url_override, params=params) try: root_elt = etree.fromstring(r.text) except Exception: raise IngestApiError.apiRequestError( Exception("error while parsing the request answer")) try: if root_elt.xpath("(//error/text())[1]")[0] != "0": err_msg = root_elt.xpath("(//errormsg/text())[1]")[0] raise IngestApiError.apiRequestError( Exception("error code returned by API: {msg}".format( msg=err_msg))) except IndexError: raise IngestApiError.apiRequestError( Exception("Invalid XML, <error> element not found")) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath("//RBNews"): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath(".//ServiceQueueId/text()")[0] except IndexError: raise IngestApiError.apiRequestError( Exception("missing ServiceQueueId element")) ack_params = { "user": user, "password": password, "servicequeueid": queue_id } self.get_url(URL_ACK, params=ack_params) return [items]
def _generate_auth_token(self, provider): """ Generates Authentication Token as per the configuration in Ingest Provider. :param provider: dict - Ingest provider details to which the current directory has been configured :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource` :return: token details if successfully authenticated :rtype: str :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration """ session = requests.Session() session.mount('https://', SSLAdapter()) auth_url = provider.get('config', {}).get('auth_url', None) if not auth_url: raise IngestApiError.apiGeneralError(provider=provider, exception=KeyError( ''' Ingest Provider {} is missing Authentication URL. Please check the configuration. '''.format(provider['name'])) ) payload = { 'username': provider.get('config', {}).get('username', ''), 'password': provider.get('config', {}).get('password', ''), } response = session.get(auth_url, params=payload, verify=False, timeout=30) if response.status_code < 200 or response.status_code >= 300: raise IngestApiError.apiAuthError(provider=provider) tree = etree.fromstring(response.content) # workaround for http mock lib return tree.text
def _fetch_data(self, config, provider): """Fetch the latest feed data. :param dict config: RSS resource configuration :param provider: data provider instance, needed as an argument when raising ingest errors :return: fetched RSS data :rtype: str :raises IngestApiError: if fetching data fails for any reason (e.g. authentication error, resource not found, etc.) """ url = config['url'] if config.get('auth_required', False): auth = (config.get('username'), config.get('password')) else: auth = None response = requests.get(url, auth=auth) if response.ok: return response.content else: if response.status_code in (401, 403): raise IngestApiError.apiAuthError( Exception(response.reason), provider) elif response.status_code == 404: raise IngestApiError.apiNotFoundError( Exception(response.reason), provider) else: raise IngestApiError.apiGeneralError( Exception(response.reason), provider)
class WufooFeedingService(FeedingService): """ Feeding Service class which can read article(s) using Wufoo API """ NAME = "wufoo" ERRORS = [ IngestApiError.apiTimeoutError().get_error_description(), IngestApiError.apiRedirectError().get_error_description(), IngestApiError.apiRequestError().get_error_description(), IngestApiError.apiGeneralError().get_error_description(), ] label = "Wufoo feed API" fields = [ { "id": "wufoo_username", "type": "text", "label": "Login", "placeholder": "Wufoo login", "required": True }, { "id": "wufoo_api_key", "type": "password", "label": "API key", "placeholder": "Wufoo API Key", "required": True, }, ] def __init__(self): super().__init__() self.fields_cache = {} def _update(self, provider, update): user = provider["config"]["wufoo_username"] wufoo_data = { "url": WUFOO_URL.format(subdomain=user), "user": user, "api_key": provider["config"]["wufoo_api_key"], "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES, "update": update, } try: parser = self.get_feed_parser(provider, None) except requests.exceptions.Timeout as ex: raise IngestApiError.apiTimeoutError(ex, provider) except requests.exceptions.TooManyRedirects as ex: raise IngestApiError.apiRedirectError(ex, provider) except requests.exceptions.RequestException as ex: raise IngestApiError.apiRequestError(ex, provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) items = parser.parse(wufoo_data, provider) return [items]
def _update(self, provider, update): updated = utcnow() last_updated = provider.get('last_updated') ttl_minutes = app.config['INGEST_EXPIRY_MINUTES'] if not last_updated or last_updated < updated - datetime.timedelta( minutes=ttl_minutes): last_updated = updated - datetime.timedelta(minutes=ttl_minutes) self.provider = provider provider_config = provider.get('config') if not provider_config: provider_config = {} provider['config'] = provider_config self.URL = provider_config.get('url') payload = {} parser = self.get_feed_parser(provider) try: response = requests.get(self.URL, params=payload, timeout=15) # TODO: check if file has been updated since provider last_updated # although some ptovider do not include 'Last-Modified' in headers # so unsure how to do this logger.info('Http Headers: %s', response.headers) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found %s' % payload) logger.info('Ingesting: %s', str(response.content)) if isinstance(parser, NTBEventXMLFeedParser): xml = ET.fromstring(response.content) items = parser.parse(xml, provider) elif isinstance(parser, IcsTwoFeedParser): cal = Calendar.from_ical(response.content) items = parser.parse(cal, provider) else: items = parser.parse(response.content) if isinstance(items, list): yield items else: yield [items]
class WufooFeedingService(FeedingService): """ Feeding Service class which can read article(s) using Wufoo API """ NAME = 'wufoo' ERRORS = [ IngestApiError.apiTimeoutError().get_error_description(), IngestApiError.apiRedirectError().get_error_description(), IngestApiError.apiRequestError().get_error_description(), IngestApiError.apiGeneralError().get_error_description() ] label = 'Wufoo feed API' fields = [{ 'id': 'wufoo_username', 'type': 'text', 'label': 'Login', 'placeholder': 'Wufoo login', 'required': True }, { 'id': 'wufoo_api_key', 'type': 'password', 'label': 'API key', 'placeholder': 'Wufoo API Key', 'required': True }] parser_restricted_values = ['wufoo'] def __init__(self): self.fields_cache = {} def _update(self, provider, update): user = provider['config']['wufoo_username'] wufoo_data = { "url": WUFOO_URL.format(subdomain=user), "user": user, "api_key": provider['config']['wufoo_api_key'], "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES, "update": update } try: parser = self.get_feed_parser(provider, None) except requests.exceptions.Timeout as ex: raise IngestApiError.apiTimeoutError(ex, provider) except requests.exceptions.TooManyRedirects as ex: raise IngestApiError.apiRedirectError(ex, provider) except requests.exceptions.RequestException as ex: raise IngestApiError.apiRequestError(ex, provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) items = parser.parse(wufoo_data, provider) return [items]
def _fetch_data(self, config, provider): url = config['url'] api_key = config['api_key'] last_update = provider.get('last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S') # Results are pagified so we'll read this many at a time offset_jump = 10 params = {'start': last_update, 'limit': offset_jump} headers = {'apikey': api_key} items = [] offset = 0 while True: params['offset'] = offset try: response = requests.get(url, params=params, headers=headers, timeout=30) except requests.exceptions.ConnectionError as err: raise IngestApiError.apiConnectionError(exception=err) if response.ok: # The total number of results are given to us in json, get them # via a regex to read the field so we don't have to convert the # whole thing to json pointlessly item_ident = re.search('\"total\": *[0-9]*', response.text).group() results_str = re.search('[0-9]+', item_ident).group() if results_str is None: raise IngestApiError.apiGeneralError( Exception(response.text), provider) num_results = int(results_str) if num_results > 0: items.append(response.text) if offset >= num_results: return items offset += offset_jump else: if re.match('Error: No API Key provided', response.text): raise IngestApiError.apiAuthError( Exception(response.text), provider) elif response.status_code == 404: raise IngestApiError.apiNotFoundError( Exception(response.reason), provider) else: raise IngestApiError.apiGeneralError( Exception(response.reason), provider) return items
def _update(self, provider, update): try: config = provider['config'] user = config['username'] password = config['password'] id_list = config['idList'] # before "products" was hardcoded as value for "idListType" id_list_type = config.get('idListType', 'products') if not user.strip() or not password.strip() or not id_list.strip(): raise KeyError except KeyError: raise SuperdeskIngestError.notConfiguredError(Exception('username, password and idList are needed')) # we remove spaces and empty values from id_list to do a clean list id_list = ','.join([id_.strip() for id_ in id_list.split(',') if id_.strip()]) params = {'idList': id_list, 'idListType': id_list_type, 'format': '5', 'maxItems': '25', 'sortOrder': 'chronological'} try: min_date_time = provider['private']['min_date_time'] sequence_number = provider['private']['sequence_number'] except KeyError: pass else: params['minDateTime'] = min_date_time params['sequenceNumber'] = sequence_number try: r = requests.get(URL, auth=(user, password), params=params) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) try: root_elt = etree.fromstring(r.content) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) parser = self.get_feed_parser(provider) items = parser.parse(root_elt, provider) try: min_date_time = root_elt.xpath('//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip() sequence_number = root_elt.xpath('//iptc:transmitId/text()', namespaces=NS)[0].strip() except IndexError: raise IngestApiError.apiRequestError(Exception('missing minDateTime or transmitId')) else: update.setdefault('private', {}) update['private']['min_date_time'] = min_date_time update['private']['sequence_number'] = sequence_number return [items]
def _update(self, provider, update): # Each update run will retrieve the data for a single "market" market_index = provider.get('private', {}).get('market_index', 0) markets = json.loads( provider.get('config', {}).get('market_definitions', []).replace('\'', '"')) market = markets[market_index] logger.info('Retrieving fuel data for the {} market'.format( market.get('market'))) try: self.session_token = self._get_token(provider).get('id') prices = self._get_prices(provider, market) self._save(prices, market) except Exception as ex: raise IngestApiError.apiGeneralError(ex, self.provider) finally: # Save the next market to process market_index = (market_index + 1) % len(markets) get_resource_service('ingest_providers').system_update( provider.get('_id'), {'private': { 'market_index': market_index }}, provider) return None
def _generate_auth_token(self, provider): """ Generates Authentication Token as per the configuration in Ingest Provider. :param provider: dict - Ingest provider details to which the current directory has been configured :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource` :return: token details if successfully authenticated :rtype: str :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration """ session = requests.Session() session.mount('https://', SSLAdapter()) auth_url = provider.get('config', {}).get('auth_url', None) if not auth_url: raise IngestApiError.apiGeneralError(provider=provider, exception=KeyError(''' Ingest Provider {} is missing Authentication URL. Please check the configuration. '''.format( provider['name']))) payload = { 'username': provider.get('config', {}).get('username', ''), 'password': provider.get('config', {}).get('password', ''), } response = session.get(auth_url, params=payload, verify=False, timeout=30) tree = etree.fromstring( response.content) # workaround for http mock lib return tree.text
def _get_worksheet(self, provider): """Get worksheet from google spreadsheet :return: worksheet :rtype: object """ scope = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive', ] config = provider.get('config', {}) url = config.get('url', '') service_account = config.get('service_account', '') title = config.get('worksheet_title', '') try: service_account = json.loads(service_account) credentials = ServiceAccountCredentials.from_json_keyfile_dict( service_account, scope) gc = gspread.authorize(credentials) spreadsheet = gc.open_by_url(url) permission = spreadsheet.list_permissions()[0] if permission['role'] != 'writer': raise IngestSpreadsheetError.SpreadsheetPermissionError() worksheet = spreadsheet.worksheet(title) return worksheet except (json.decoder.JSONDecodeError, AttributeError, ValueError) as e: # both permission and credential raise Value error if e.args[0] == 15100: raise IngestSpreadsheetError.SpreadsheetPermissionError() raise IngestSpreadsheetError.SpreadsheetCredentialsError() except gspread.exceptions.NoValidUrlKeyFound: raise IngestApiError.apiNotFoundError() except gspread.exceptions.WorksheetNotFound: raise IngestSpreadsheetError.WorksheetNotFoundError() except gspread.exceptions.APIError as e: error = e.response.json()['error'] response_code = error['code'] logger.error('Provider %s: %s', provider.get('name'), error['message']) if response_code == 403: raise IngestSpreadsheetError.SpreadsheetPermissionError() elif response_code == 429: raise IngestSpreadsheetError.SpreadsheetQuotaLimitError() else: raise IngestApiError.apiNotFoundError()
def _get_tree(self, endpoint, payload=None): """Get xml response for given API endpoint and payload. :param: endpoint :type endpoint: str :param: payload :type payload: str """ if payload is None: payload = {} payload['token'] = self._get_auth_token(self.provider, update=True) url = self._get_absolute_url(endpoint) if not self.session: self.session = requests.Session() retries = 0 while True: try: response = self.session.get(url, params=payload, timeout=(30, 15)) except requests.exceptions.Timeout as ex: if retries < 3: logger.warn( 'Reuters API timeout retrying, retries {}'.format( retries)) retries += 1 continue raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError( _('Not found {payload}').format(payload=payload)) break try: return etree.fromstring( response.content) # workaround for http mock lib except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider)
def _update(self, provider, update): config = self.config try: user, password = self.config['username'], self.config['password'] except KeyError: SuperdeskIngestError.notConfiguredError(Exception('username and password are needed')) url_override = config.get('url', '').strip() if not url_override.startswith('http'): SuperdeskIngestError.notConfiguredError(Exception('if URL is set, it must be a valid http link')) if url_override: params = {'user': user, 'password': password, 'maksAntal': 50} else: params = {'user': user, 'password': password, 'maksAntal': 50, 'waitAcknowledge': 'true'} r = self.get_url(url_override, params=params) try: root_elt = etree.fromstring(r.text) except Exception: raise IngestApiError.apiRequestError(Exception('error while parsing the request answer')) try: if root_elt.xpath('(//error/text())[1]')[0] != '0': err_msg = root_elt.xpath('(//errormsg/text())[1]')[0] raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg))) except IndexError: raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found')) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath('//RBNews'): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath('.//ServiceQueueId/text()')[0] except IndexError: raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element')) ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id} self.get_url(URL_ACK, params=ack_params) return [items]
def _generate_auth_token(self, provider): """ Generates Authentication Token as per the configuration in Ingest Provider. :param provider: dict - Ingest provider details to which the current directory has been configured :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource` :return: token details if successfully authenticated :rtype: str :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration """ session = requests.Session() session.mount("https://", SSLAdapter()) auth_url = provider.get("config", {}).get("auth_url", None) if not auth_url: raise IngestApiError.apiGeneralError( provider=provider, exception=KeyError( """ Ingest Provider {} is missing Authentication URL. Please check the configuration. """.format( provider["name"] ) ), ) payload = { "username": provider.get("config", {}).get("username", ""), "password": provider.get("config", {}).get("password", ""), } response = session.get(auth_url, params=payload, verify=False, timeout=30) if response.status_code < 200 or response.status_code >= 300: try: response.raise_for_status() except Exception: err = IngestApiError.apiAuthError(provider=provider) self.close_provider(provider, err, force=True) raise err tree = etree.fromstring(response.content) # workaround for http mock lib return tree.text
def _fetch_data(self, config, provider): """Fetch the latest feed data. :param dict config: RSS resource configuration :param provider: data provider instance, needed as an argument when raising ingest errors :return: fetched RSS data :rtype: str :raises IngestApiError: if fetching data fails for any reason (e.g. authentication error, resource not found, etc.) """ url = config['url'] if config.get('auth_required', False): auth = (config.get('username'), config.get('password')) self.auth_info = { 'username': config.get('username', ''), 'password': config.get('password', '') } else: auth = None try: response = requests.get(url, auth=auth, timeout=30) except requests.exceptions.ConnectionError as err: raise IngestApiError.apiConnectionError(exception=err, provider=provider) except requests.exceptions.RequestException as err: raise IngestApiError.apiURLError(exception=err, provider=provider) if response.ok: return response.content else: if response.status_code in (401, 403): raise IngestApiError.apiAuthError( Exception(response.reason), provider) elif response.status_code == 404: raise IngestApiError.apiNotFoundError( Exception(response.reason), provider) else: raise IngestApiError.apiGeneralError( Exception(response.reason), provider)
def _request(self, url): try: response = requests.get(url, params={}, timeout=120) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found') return response.content
def _update(self, provider, update): user = provider['config']['wufoo_username'] wufoo_data = { "url": WUFOO_URL.format(subdomain=user), "user": user, "api_key": provider['config']['wufoo_api_key'], "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES, "update": update} try: parser = self.get_feed_parser(provider, None) except requests.exceptions.Timeout as ex: raise IngestApiError.apiTimeoutError(ex, provider) except requests.exceptions.TooManyRedirects as ex: raise IngestApiError.apiRedirectError(ex, provider) except requests.exceptions.RequestException as ex: raise IngestApiError.apiRequestError(ex, provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) items = parser.parse(wufoo_data, provider) return [items]
def test_raise_apiNotFoundError(self): with assert_raises(IngestApiError) as error_context: ex = Exception("Testing apiNotFoundError") raise IngestApiError.apiNotFoundError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4006) self.assertTrue(exception.message == "API service not found(404) error") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing apiNotFoundError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4006 - API service not found(404) error: " "Testing apiNotFoundError on channel TestProvider")
def test_raise_apiRedirectError(self): with assert_raises(IngestApiError) as error_context: ex = Exception("Testing apiRedirectError") raise IngestApiError.apiRedirectError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4002) self.assertTrue(exception.message == "API ingest has too many redirects") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing apiRedirectError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4002 - API ingest has too many redirects: " "Testing apiRedirectError on channel TestProvider")
def _get_tree(self, endpoint, payload=None): """Get xml response for given API endpoint and payload. :param: endpoint :type endpoint: str :param: payload :type payload: str """ if payload is None: payload = {} payload['token'] = self._get_auth_token(self.provider, update=True) url = self._get_absolute_url(endpoint) if not self.session: self.session = requests.Session() retries = 0 while True: try: response = self.session.get(url, params=payload, timeout=(30, 15)) except requests.exceptions.Timeout as ex: if retries < 3: logger.warn('Reuters API timeout retrying, retries {}'.format(retries)) retries += 1 continue raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found %s' % payload) break try: return etree.fromstring(response.content) # workaround for http mock lib except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider)
class NewsworthyFeedingService(FeedingService): """ Feeding Service class which can retrieve articles from Newsworthy web service """ NAME = 'newsworthy' ERRORS = [IngestApiError.apiRequestError().get_error_description(), SuperdeskIngestError.notConfiguredError().get_error_description()] label = 'Newsworthy' fields = [ { 'id': 'url', 'type': 'text', 'label': 'Use this URL for webhook', 'default_value': '', 'readonly': True, }, { 'id': 'username', 'type': 'text', 'label': 'Username', 'required': True }, { 'id': 'password', 'type': 'password', 'label': 'Password', 'required': True }, { 'id': 'secret', 'type': 'password', 'label': 'Shared Secret', 'placeholder': 'Shared Secret', 'required': False }, ] def _update(self, provider, update): try: data = provider['newsworthy_data'] except IndexError: return [[]] if data['hook']['event'] == EVENT_UNPUBLISHED: logger.info("ignoring unpublish event on following data:\n{data}".format(data=data)) return [[]] # we have to write to a temporary file because feed parser expect a file path # FIXME: it would be better to use the data directly with NamedTemporaryFile('w') as f: json.dump(data['data'], f) f.seek(0) parser = self.get_feed_parser(provider, f.name) items = parser.parse(f.name, provider) return [items]
def test_raise_apiParseError(self): with assert_raises(IngestApiError) as error_context: ex = Exception("Testing apiParseError") raise IngestApiError.apiParseError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4005) self.assertTrue(exception.message == "API ingest xml parse error") self.assertIsNotNone(exception.system_exception) self.assertEquals(exception.system_exception.args[0], "Testing apiParseError") self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1) self.assertEqual( self.mock_logger_handler.messages["error"][0], "IngestApiError Error 4005 - API ingest xml parse error: " "Testing apiParseError on channel TestProvider", )
def test_raise_apiRequestError(self): with assert_raises(IngestApiError) as error_context: ex = Exception("Testing apiRequestError") raise IngestApiError.apiRequestError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4003) self.assertTrue(exception.message == "API ingest has request error") self.assertIsNotNone(exception.system_exception) self.assertEquals(exception.system_exception.args[0], "Testing apiRequestError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual( self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4003 - API ingest has request error: " "Testing apiRequestError on channel TestProvider")
def test_raise_apiNotFoundError(self): with assert_raises(IngestApiError) as error_context: try: ex = Exception("Testing apiNotFoundError") raise ex except Exception: raise IngestApiError.apiNotFoundError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4006) self.assertTrue(exception.message == "API service not found(404) error") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing apiNotFoundError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4006 - API service not found(404) error: " "Testing apiNotFoundError on channel TestProvider")
def test_raise_apiTimeoutError(self): with assert_raises(IngestApiError) as error_context: try: ex = Exception("Testing apiTimeoutError") raise ex except Exception: raise IngestApiError.apiTimeoutError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4001) self.assertTrue(exception.message == "API ingest connection has timed out.") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing apiTimeoutError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4001 - API ingest connection has timed out.: " "Testing apiTimeoutError on channel TestProvider")
def test_raise_apiUnicodeError(self): with assert_raises(IngestApiError) as error_context: try: ex = Exception("Testing apiUnicodeError") raise ex except Exception: raise IngestApiError.apiUnicodeError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4004) self.assertTrue(exception.message == "API ingest Unicode Encode Error") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing apiUnicodeError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4004 - API ingest Unicode Encode Error: " "Testing apiUnicodeError on channel TestProvider")
def _update(self, provider, update): try: config = provider['config'] user = config['username'] password = config['password'] except KeyError: SuperdeskIngestError.notConfiguredError(Exception('username and password are needed')) url_override = config.get('url', '').strip() if not url_override.startswith('http'): SuperdeskIngestError.notConfiguredError(Exception('if URL is set, it must be a valid http link')) if url_override: params = {'user': user, 'password': password, 'maksAntal': 50} else: params = {'user': user, 'password': password, 'maksAntal': 50, 'waitAcknowledge': 'true'} try: r = requests.get(url_override or URL, params=params) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) try: root_elt = etree.fromstring(r.text) except Exception: raise IngestApiError.apiRequestError(Exception('error while parsing the request answer')) try: if root_elt.xpath('(//error/text())[1]')[0] != '0': err_msg = root_elt.xpath('(//errormsg/text())[1]')[0] raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg))) except IndexError: raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found')) parser = self.get_feed_parser(provider) items = [] for elt in root_elt.xpath('//RBNews'): item = parser.parse(elt, provider) items.append(item) if not url_override: try: queue_id = elt.xpath('.//ServiceQueueId/text()')[0] except IndexError: raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element')) ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id} try: requests.get(URL_ACK, params=ack_params) except Exception: raise IngestApiError.apiRequestError(Exception('error while doing the request')) return [items]
def test_raise_apiGeneralError(self): with assert_raises(IngestApiError) as error_context: ex = Exception("Testing general API error") raise IngestApiError.apiGeneralError(ex, self.provider) exception = error_context.exception self.assertEqual(exception.code, 4000) self.assertEqual(exception.message, "Unknown API ingest error") self.assertEqual(exception.provider_name, "TestProvider") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing general API error") self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1) self.assertEqual( self.mock_logger_handler.messages["error"][0], "IngestApiError Error 4000 - Unknown API ingest error: " "Testing general API error on channel TestProvider", )
def test_raise_apiGeneralError(self): with assert_raises(IngestApiError) as error_context: ex = Exception("Testing general API error") raise IngestApiError.apiGeneralError(ex, self.provider) exception = error_context.exception self.assertEqual(exception.code, 4000) self.assertEqual(exception.message, "Unknown API ingest error") self.assertEqual(exception.provider_name, "TestProvider") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing general API error") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual( self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4000 - Unknown API ingest error: " "Testing general API error on channel TestProvider")
def test_raise_apiParseError(self): with assert_raises(IngestApiError) as error_context: try: ex = Exception("Testing apiParseError") raise ex except Exception: raise IngestApiError.apiParseError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 4005) self.assertTrue(exception.message == "API ingest xml parse error") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing apiParseError") self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1) self.assertEqual( self.mock_logger_handler.messages["error"][0], "IngestApiError Error 4005 - API ingest xml parse error: " "Testing apiParseError on channel TestProvider", )
def _get_tree(self, endpoint, payload=None): """ Get xml response for given API endpoint and payload. :param: endpoint :type endpoint: str :param: payload :type payload: str """ if payload is None: payload = {} payload['token'] = self._get_auth_token(self.provider, update=True) url = self._get_absolute_url(endpoint) try: response = requests.get(url, params=payload, timeout=15) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found %s' % payload) try: return etree.fromstring( response.content) # workaround for http mock lib except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider)
def get_tree(self, endpoint, payload=None): """Get xml response for given API endpoint and payload.""" if payload is None: payload = {} payload['token'] = self.get_token() url = self.get_url(endpoint) try: response = requests.get(url, params=payload, timeout=21.0) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError(error, self.provider) if response.status_code == 404: raise IngestApiError.apiNotFoundError( LookupError('Not found %s' % payload), self.provider) try: # workaround for httmock lib # return etree.fromstring(response.text.encode('utf-8')) return etree.fromstring(response.content) except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError(error, self.provider)
def _get_tree(self, endpoint, payload=None): """ Get xml response for given API endpoint and payload. :param: endpoint :type endpoint: str :param: payload :type payload: str """ if payload is None: payload = {} payload['token'] = self._get_auth_token(self.provider, update=True) url = self._get_absolute_url(endpoint) try: response = requests.get(url, params=payload, timeout=15) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found %s' % payload) try: return etree.fromstring(response.content) # workaround for http mock lib except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider)
def get_url(self, url=None, **kwargs): """Do an HTTP Get on URL and validate response. :param string url: url to use (None to use self.HTTP_URL) :param **kwargs: extra parameter for requests :return dict: response content data """ response = super().get_url(url=url, **kwargs) content = response.json() if content['hasError']: msg = "Error in GET: '{}'. ErrorCode: '{}'. Description: '{}'".format( url, content['data']['errorCode'], content['data']['description'] ) logger.error(msg) raise IngestApiError.apiGeneralError(Exception(msg), self.provider) return content['data']
def _fetch_data(self): url = self.config['url'] api_key = self.config['api_key'] last_update = self.provider.get( 'last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S') # Results are pagified so we'll read this many at a time offset_jump = 10 params = {'start': last_update, 'limit': offset_jump} headers = {'apikey': api_key} items = [] offset = 0 while True: params['offset'] = offset response = self.get_url(url, params=params, headers=headers) # The total number of results are given to us in json, get them # via a regex to read the field so we don't have to convert the # whole thing to json pointlessly item_ident = re.search('\"total\": *[0-9]*', response.text).group() results_str = re.search('[0-9]+', item_ident).group() if results_str is None: raise IngestApiError.apiGeneralError(Exception(response.text), self.provider) num_results = int(results_str) if num_results > 0: items.append(response.text) if offset >= num_results: return items offset += offset_jump return items
def _update(self, provider, update): # Each update run will retrieve the data for a single "market" market_index = provider.get('private', {}).get('market_index', 0) markets = json.loads(provider.get('config', {}).get('market_definitions', []).replace('\'', '"')) market = markets[market_index] logger.info('Retrieving fuel data for the {} market'.format(market.get('market'))) try: self.session_token = self._get_token(provider).get('id') prices = self._get_prices(provider, market) self._save(prices, market) except Exception as ex: raise IngestApiError.apiGeneralError(ex, self.provider) finally: # Save the next market to process market_index = (market_index + 1) % len(markets) get_resource_service('ingest_providers').system_update(provider.get('_id'), {'private': {'market_index': market_index}}, provider) return None
class EventHTTPFeedingService(HTTPFeedingServiceBase): """ Feeding Service class which can read events using HTTP """ NAME = 'event_http' label = 'Event HTTP feed' service = 'events' fields = [ { 'id': 'url', 'type': 'text', 'label': 'Feed URL', 'placeholder': 'Feed URL', 'required': True } ] ERRORS = [IngestApiError.apiTimeoutError().get_error_description(), IngestApiError.apiRedirectError().get_error_description(), IngestApiError.apiRequestError().get_error_description(), IngestApiError.apiUnicodeError().get_error_description(), IngestApiError.apiParseError().get_error_description(), IngestApiError.apiGeneralError().get_error_description()] HTTP_AUTH = False def _update(self, provider, update): """ Fetch events from external API. :param provider: Ingest Provider Details. :type provider: dict :param update: Any update that is required on provider. :type update: dict :return: a list of events which can be saved. """ response = self.get_url(self.config['url']) parser = self.get_feed_parser(provider) logger.info('Ingesting events with {} parser'.format(parser.__class__.__name__)) logger.info('Ingesting content: {} ...'.format(str(response.content)[:4000])) if hasattr(parser, 'parse_http'): items = parser.parse_http(response.content, provider) else: items = parser.parse(response.content) if isinstance(items, list): yield items else: yield [items]
def test_raise_apiAuthError(self): with assert_raises(IngestApiError) as error_context: ex = Exception("Testing API authorization error") raise IngestApiError.apiAuthError(ex, self.provider) exception = error_context.exception self.assertEqual(exception.code, 4007) self.assertEqual( exception.message, "API authorization error") self.assertEqual(exception.provider_name, "TestProvider") self.assertIsNotNone(exception.system_exception) self.assertEqual( exception.system_exception.args[0], "Testing API authorization error") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual( self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4007 - API authorization error: " "Testing API authorization error on channel TestProvider")
def _update(self, provider, update): def convert_date(epoch): dt = local_to_utc(config.DEFAULT_TIMEZONE, datetime.fromtimestamp(int(str(epoch)[:10]))) return dt username = provider.get('config', {}).get('username') password = provider.get('config', {}).get('password') url = provider.get('config', {}).get('api_url') try: response = requests.get(url, auth=(username, password)) response.raise_for_status() except Exception as ex: raise IngestApiError.apiGeneralError(ex, self.provider) data = json.loads(response.content.decode('UTF-8')) service = get_resource_service('traffic_incidents') incidents = [] for feature in data.get('features', []): props = feature.get('properties', {}) incident = { 'guid': int(props.get('id')), 'start_date': convert_date(props.get('startDate')), 'end_date': convert_date(props.get('endDate')), 'incident_type': props.get('type'), 'incident_description': props.get('description'), 'city': props.get('city'), 'state': props.get('state'), 'from_street_name': props.get('fromStreetName'), 'from_cross_street_name': props.get('fromCrossStreetName'), 'to_street_name': props.get('toStreetName'), 'to_cross_street_name': props.get('toCrossStreetName'), 'geometry': feature.get('geometry') } incident.get('geometry').pop('crs') incidents.append(incident) service.delete(lookup={}) service.post(incidents)
def test_raise_apiAuthError(self): with assert_raises(IngestApiError) as error_context: try: ex = Exception("Testing API authorization error") raise ex except: raise IngestApiError.apiAuthError(ex, self.provider) exception = error_context.exception self.assertEqual(exception.code, 4007) self.assertEqual(exception.message, "API authorization error") self.assertEqual(exception.provider_name, "TestProvider") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing API authorization error") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual( self.mock_logger_handler.messages['error'][0], "IngestApiError Error 4007 - API authorization error: " "Testing API authorization error on channel TestProvider")
def get_tree(self, endpoint, payload=None): """Get xml response for given API endpoint and payload.""" if payload is None: payload = {} payload['token'] = self.get_token() url = self.get_url(endpoint) try: response = requests.get(url, params=payload, timeout=21.0) except requests.exceptions.Timeout as ex: # Maybe set up for a retry, or continue in a retry loop raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError(error, self.provider) if response.status_code == 404: raise IngestApiError.apiNotFoundError(LookupError('Not found %s' % payload), self.provider) try: # workaround for httmock lib # return etree.fromstring(response.text.encode('utf-8')) return etree.fromstring(response.content) except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError(error, self.provider)
class ReutersHTTPFeedingService(HTTPFeedingService): """ Feeding Service class which can read article(s) using HTTP provided by Reuters. """ NAME = "reuters_http" ERRORS = [ IngestApiError.apiTimeoutError().get_error_description(), IngestApiError.apiRedirectError().get_error_description(), IngestApiError.apiRequestError().get_error_description(), IngestApiError.apiUnicodeError().get_error_description(), IngestApiError.apiParseError().get_error_description(), IngestApiError.apiGeneralError().get_error_description(), ] DATE_FORMAT = "%Y.%m.%d.%H.%M" label = "Reuters feed API" fields = [ { "id": "url", "type": "text", "label": "Feed URL", "placeholder": "Feed URL", "required": True, "default": "http://rmb.reuters.com/rmd/rest/xml", }, { "id": "auth_url", "type": "text", "label": "URL for Authentication", "placeholder": "authentication url", "required": True, "default": "https://commerce.reuters.com/rmd/rest/xml/login", }, {"id": "username", "type": "text", "label": "Username", "placeholder": "Username", "required": True}, {"id": "password", "type": "password", "label": "Password", "placeholder": "Password", "required": True}, ] session = None def _update(self, provider, update): updated = utcnow() last_updated = provider.get("last_updated") ttl_minutes = app.config["INGEST_EXPIRY_MINUTES"] if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes): last_updated = updated - datetime.timedelta(minutes=ttl_minutes) self.provider = provider provider_config = provider.get("config") if not provider_config: provider_config = {} provider["config"] = provider_config provider_config.setdefault("url", "http://rmb.reuters.com/rmd/rest/xml") provider_config.setdefault("auth_url", "https://commerce.reuters.com/rmd/rest/xml/login") self.URL = provider_config.get("url") for channel in self._get_channels(): ids = self._get_article_ids(channel, last_updated, updated) for id in ids: try: items = self.fetch_ingest(id) if items: yield items # if there was an exception processing the one of the bunch log it and continue except Exception as ex: logger.warn("Reuters item {} has not been retrieved".format(id)) logger.exception(ex) def _get_channels(self): """Get subscribed channels.""" channels = [] tree = self._get_tree("channels") for channel in tree.findall("channelInformation"): channels.append(channel.find("alias").text) return channels def _get_tree(self, endpoint, payload=None): """Get xml response for given API endpoint and payload. :param: endpoint :type endpoint: str :param: payload :type payload: str """ if payload is None: payload = {} payload["token"] = self._get_auth_token(self.provider, update=True) url = self._get_absolute_url(endpoint) if not self.session: self.session = requests.Session() retries = 0 while True: try: response = self.session.get(url, params=payload, timeout=(30, 15)) except requests.exceptions.Timeout as ex: if retries < 3: logger.warn("Reuters API timeout retrying, retries {}".format(retries)) retries += 1 continue raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError(_("Not found {payload}").format(payload=payload)) break try: return etree.fromstring(response.content) # workaround for http mock lib except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) def _get_absolute_url(self, endpoint): """ Get absolute URL for given endpoint. :param: endpoint :type endpoint: str """ return "/".join([self.URL, endpoint]) def _get_article_ids(self, channel, last_updated, updated): """ Get article ids which should be upserted also save the poll token that is returned. """ ids = set() payload = {"channel": channel, "fieldsRef": "id"} # check if the channel has a pollToken if not fall back to dateRange last_poll_token = self._get_poll_token(channel) if last_poll_token is not None: logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token)) payload["pollToken"] = last_poll_token else: payload["dateRange"] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated)) logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload["dateRange"])) tree = self._get_tree("items", payload) status_code = tree.find("status").get("code") if tree.tag == "results" else tree.get("code") # check the returned status if status_code != "10": logger.warn("Reuters channel request returned status code {}".format(status_code)) # status code 30 indicates failure if status_code == "30": # invalid token logger.warn( "Reuters error on channel {} code {} {}".format( channel, tree.find("error").get("code"), tree.find("error").text ) ) if tree.find("error").get("code") == "2100": self._save_poll_token(channel, None) logger.warn("Reuters channel invalid token reseting {}".format(status_code)) return ids # extract the returned poll token if there is one poll_token = tree.find("pollToken") if poll_token is not None: # a new token indicated new content if poll_token.text != last_poll_token: logger.info("Reuters channel {} new token {}".format(channel, poll_token.text)) self._save_poll_token(channel, poll_token.text) else: # the token has not changed, so nothing new logger.info("Reuters channel {} nothing new".format(channel)) return ids else: logger.info("Reuters channel {} retrieved no token".format(channel)) return ids for result in tree.findall("result"): id = result.find("id").text ids.add(id) logger.info("Reuters id : {}".format(id)) return ids def _save_poll_token(self, channel, poll_token): """Saves the poll token for the passed channel in the config section of the :param channel: :param poll_token: :return: """ # get the provider in case it has been updated by another channel ingest_provider_service = superdesk.get_resource_service("ingest_providers") provider = ingest_provider_service.find_one(req=None, _id=self.provider[superdesk.config.ID_FIELD]) provider_token = provider.get("tokens") if "poll_tokens" not in provider_token: provider_token["poll_tokens"] = {channel: poll_token} else: provider_token["poll_tokens"][channel] = poll_token upd_provider = {"tokens": provider_token} ingest_provider_service.system_update(self.provider[superdesk.config.ID_FIELD], upd_provider, self.provider) def _get_poll_token(self, channel): """Get the poll token from provider config if it is available. :param channel: :return: token """ if "tokens" in self.provider and "poll_tokens" in self.provider["tokens"]: return self.provider.get("tokens").get("poll_tokens").get(channel, None) def _format_date(self, date): return date.strftime(self.DATE_FORMAT) def fetch_ingest(self, id): items = self._parse_items(id) result_items = [] while items: item = items.pop() self.localize_timestamps(item) try: items.extend(self._fetch_items_in_package(item)) result_items.append(item) except LookupError as err: self.log_item_error(err, item, self.provider) return [] return result_items def _parse_items(self, id): """ Parse item message and return given items. """ payload = {"id": id} tree = self._get_tree("item", payload) parser = self.get_feed_parser(self.provider, tree) items = parser.parse(tree, self.provider) return items def _fetch_items_in_package(self, item): """ Fetch remote assets for given item. """ items = [] for group in item.get("groups", []): for ref in group.get("refs", []): if "residRef" in ref: items.extend(self._parse_items(ref.get("residRef"))) return items def prepare_href(self, href, mimetype=None): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, "", "", "")) return "%s?auth_token=%s" % (new_href, self._get_auth_token(self.provider, update=True))
import requests from superdesk.io.ingest_service import IngestService from superdesk.utc import utcnow from superdesk.etree import etree, ParseError from superdesk.io import register_provider from .newsml_2_0 import NewsMLTwoParser from .reuters_token import get_token from superdesk.errors import IngestApiError from flask import current_app as app PROVIDER = 'reuters' errors = [IngestApiError.apiTimeoutError().get_error_description(), IngestApiError.apiRedirectError().get_error_description(), IngestApiError.apiRequestError().get_error_description(), IngestApiError.apiUnicodeError().get_error_description(), IngestApiError.apiParseError().get_error_description(), IngestApiError.apiGeneralError().get_error_description()] class ReutersIngestService(IngestService): """Reuters ingest service.""" DATE_FORMAT = '%Y.%m.%d.%H.%M' URL = 'http://rmb.reuters.com/rmd/rest/xml' token = None def __init__(self):
from datetime import datetime from superdesk.errors import IngestApiError, ParserError from superdesk.io import register_provider from superdesk.io.ingest_service import IngestService from superdesk.utils import merge_dicts from urllib.parse import quote as urlquote, urlsplit, urlunsplit PROVIDER = "rss" utcfromtimestamp = datetime.utcfromtimestamp errors = [ IngestApiError.apiAuthError().get_error_description(), IngestApiError.apiNotFoundError().get_error_description(), IngestApiError.apiGeneralError().get_error_description(), ParserError.parseMessageError().get_error_description(), ] class RssIngestService(IngestService): """Ingest service for providing feeds received in RSS 2.0 format. (NOTE: it should also work with other syndicated feeds formats, too, since the underlying parser supports them, but for our needs RSS 2.0 is assumed) """ ItemField = namedtuple("ItemField", ["name", "name_in_data", "type"])