Exemple #1
0
    def _fetch_data(self, config, provider):
        url = config['url']
        api_key = config['api_key']

        last_update = provider.get(
            'last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S')

        # Results are pagified so we'll read this many at a time
        offset_jump = 10

        params = {'start': last_update, 'limit': offset_jump}
        headers = {'apikey': api_key}

        items = []

        offset = 0
        while True:
            params['offset'] = offset

            try:
                response = requests.get(url,
                                        params=params,
                                        headers=headers,
                                        timeout=30)
            except requests.exceptions.ConnectionError as err:
                raise IngestApiError.apiConnectionError(exception=err)

            if response.ok:
                # The total number of results are given to us in json, get them
                # via a regex to read the field so we don't have to convert the
                # whole thing to json pointlessly
                item_ident = re.search('\"total\": *[0-9]*',
                                       response.text).group()
                results_str = re.search('[0-9]+', item_ident).group()

                if results_str is None:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.text), provider)

                num_results = int(results_str)

                if num_results > 0:
                    items.append(response.text)

                if offset >= num_results:
                    return items

                offset += offset_jump
            else:
                if re.match('Error: No API Key provided', response.text):
                    raise IngestApiError.apiAuthError(Exception(response.text),
                                                      provider)
                elif response.status_code == 404:
                    raise IngestApiError.apiNotFoundError(
                        Exception(response.reason), provider)
                else:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.reason), provider)

        return items
Exemple #2
0
    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
            id_list = config['idList']
        except KeyError as e:
            SuperdeskIngestError.notConfiguredError(
                Exception('username, password and idList are needed'))

        # we remove spaces and empty values from id_list to do a clean list
        id_list = ','.join(
            [id_.strip() for id_ in id_list.split(',') if id_.strip()])

        params = {
            'idList': id_list,
            'idListType': 'products',
            'format': '5',
            'maxItems': '25',
            'sortOrder': 'chronological'
        }
        try:
            min_date_time = provider['private']['min_date_time']
            sequence_number = provider['private']['sequence_number']
        except KeyError:
            pass
        else:
            params['minDateTime'] = min_date_time
            params['sequenceNumber'] = sequence_number

        try:
            r = requests.get(URL, auth=(user, password), params=params)
        except Exception as e:
            raise IngestApiError.apiRequestError(
                Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.content)
        except Exception as e:
            raise IngestApiError.apiRequestError(
                Exception('error while doing the request'))

        parser = self.get_feed_parser(provider)
        items = parser.parse(root_elt, provider)

        try:
            min_date_time = root_elt.xpath(
                '//iptc:timestamp[@role="minDateTime"]/text()',
                namespaces=NS)[0].strip()
            sequence_number = root_elt.xpath('//iptc:transmitId/text()',
                                             namespaces=NS)[0].strip()
        except IndexError:
            raise IngestApiError.apiRequestError(
                Exception('missing minDateTime or transmitId'))
        else:
            update.setdefault('private', {})
            update['private']['min_date_time'] = min_date_time
            update['private']['sequence_number'] = sequence_number

        return [items]
Exemple #3
0
    def _fetch_data(self, config, provider):
        """Fetch the latest feed data.

        :param dict config: RSS resource configuration
        :param provider: data provider instance, needed as an argument when
            raising ingest errors
        :return: fetched RSS data
        :rtype: str

        :raises IngestApiError: if fetching data fails for any reason
            (e.g. authentication error, resource not found, etc.)
        """
        url = config["url"]

        if config.get("auth_required", False):
            auth = (config.get("username"), config.get("password"))
        else:
            auth = None

        response = requests.get(url, auth=auth)

        if response.ok:
            return response.content
        else:
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(Exception(response.reason), provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(Exception(response.reason), provider)
Exemple #4
0
    def _test(self, provider):
        config = provider.get('config', {})
        url = config['url']
        api_key = config['api_key']

        # limit the data to a single article and filter out all article fields
        # to save bandwidth
        params = {'limit': 1, 'fields': 'id'}
        headers = {'apikey': api_key}

        try:
            response = requests.get(url,
                                    params=params,
                                    headers=headers,
                                    timeout=30)
        except requests.exceptions.ConnectionError as err:
            raise IngestApiError.apiConnectionError(exception=err)

        if not response.ok:
            if response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
Exemple #5
0
    def _update(self, provider, update):
        config = self.config
        try:
            user, password = self.config["username"], self.config["password"]
        except KeyError:
            SuperdeskIngestError.notConfiguredError(
                Exception("username and password are needed"))

        url_override = config.get("url", "").strip()
        if not url_override.startswith("http"):
            SuperdeskIngestError.notConfiguredError(
                Exception("if URL is set, it must be a valid http link"))

        if url_override:
            params = {"user": user, "password": password, "maksAntal": 50}
        else:
            params = {
                "user": user,
                "password": password,
                "maksAntal": 50,
                "waitAcknowledge": "true"
            }

        r = self.get_url(url_override, params=params)

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(
                Exception("error while parsing the request answer"))

        try:
            if root_elt.xpath("(//error/text())[1]")[0] != "0":
                err_msg = root_elt.xpath("(//errormsg/text())[1]")[0]
                raise IngestApiError.apiRequestError(
                    Exception("error code returned by API: {msg}".format(
                        msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(
                Exception("Invalid XML, <error> element not found"))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath("//RBNews"):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath(".//ServiceQueueId/text()")[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(
                        Exception("missing ServiceQueueId element"))
                ack_params = {
                    "user": user,
                    "password": password,
                    "servicequeueid": queue_id
                }
                self.get_url(URL_ACK, params=ack_params)

        return [items]
    def _generate_auth_token(self, provider):
        """
        Generates Authentication Token as per the configuration in Ingest Provider.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :return: token details if successfully authenticated
        :rtype: str
        :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration
        """
        session = requests.Session()
        session.mount('https://', SSLAdapter())

        auth_url = provider.get('config', {}).get('auth_url', None)
        if not auth_url:
            raise IngestApiError.apiGeneralError(provider=provider,
                                                 exception=KeyError(
                                                     '''
                                                     Ingest Provider {} is missing Authentication URL.
                                                     Please check the configuration.
                                                     '''.format(provider['name']))
                                                 )

        payload = {
            'username': provider.get('config', {}).get('username', ''),
            'password': provider.get('config', {}).get('password', ''),
        }

        response = session.get(auth_url, params=payload, verify=False, timeout=30)
        if response.status_code < 200 or response.status_code >= 300:
            raise IngestApiError.apiAuthError(provider=provider)

        tree = etree.fromstring(response.content)  # workaround for http mock lib
        return tree.text
Exemple #7
0
    def _fetch_data(self, config, provider):
        """Fetch the latest feed data.

        :param dict config: RSS resource configuration
        :param provider: data provider instance, needed as an argument when
            raising ingest errors
        :return: fetched RSS data
        :rtype: str

        :raises IngestApiError: if fetching data fails for any reason
            (e.g. authentication error, resource not found, etc.)
        """
        url = config['url']

        if config.get('auth_required', False):
            auth = (config.get('username'), config.get('password'))
        else:
            auth = None

        response = requests.get(url, auth=auth)

        if response.ok:
            return response.content
        else:
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(
                    Exception(response.reason), provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
Exemple #8
0
class WufooFeedingService(FeedingService):
    """
    Feeding Service class which can read article(s) using Wufoo API
    """

    NAME = "wufoo"

    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description(),
    ]

    label = "Wufoo feed API"

    fields = [
        {
            "id": "wufoo_username",
            "type": "text",
            "label": "Login",
            "placeholder": "Wufoo login",
            "required": True
        },
        {
            "id": "wufoo_api_key",
            "type": "password",
            "label": "API key",
            "placeholder": "Wufoo API Key",
            "required": True,
        },
    ]

    def __init__(self):
        super().__init__()
        self.fields_cache = {}

    def _update(self, provider, update):
        user = provider["config"]["wufoo_username"]
        wufoo_data = {
            "url": WUFOO_URL.format(subdomain=user),
            "user": user,
            "api_key": provider["config"]["wufoo_api_key"],
            "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES,
            "update": update,
        }
        try:
            parser = self.get_feed_parser(provider, None)
        except requests.exceptions.Timeout as ex:
            raise IngestApiError.apiTimeoutError(ex, provider)
        except requests.exceptions.TooManyRedirects as ex:
            raise IngestApiError.apiRedirectError(ex, provider)
        except requests.exceptions.RequestException as ex:
            raise IngestApiError.apiRequestError(ex, provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
        items = parser.parse(wufoo_data, provider)
        return [items]
Exemple #9
0
    def _update(self, provider, update):
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(
                minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get('config')
        if not provider_config:
            provider_config = {}
            provider['config'] = provider_config

        self.URL = provider_config.get('url')
        payload = {}

        parser = self.get_feed_parser(provider)

        try:
            response = requests.get(self.URL, params=payload, timeout=15)
            # TODO: check if file has been updated since provider last_updated
            # although some ptovider do not include 'Last-Modified' in headers
            # so unsure how to do this
            logger.info('Http Headers: %s', response.headers)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        logger.info('Ingesting: %s', str(response.content))

        if isinstance(parser, NTBEventXMLFeedParser):
            xml = ET.fromstring(response.content)
            items = parser.parse(xml, provider)
        elif isinstance(parser, IcsTwoFeedParser):
            cal = Calendar.from_ical(response.content)
            items = parser.parse(cal, provider)
        else:
            items = parser.parse(response.content)

        if isinstance(items, list):
            yield items
        else:
            yield [items]
Exemple #10
0
class WufooFeedingService(FeedingService):
    """
    Feeding Service class which can read article(s) using Wufoo API
    """

    NAME = 'wufoo'

    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description()
    ]

    label = 'Wufoo feed API'

    fields = [{
        'id': 'wufoo_username',
        'type': 'text',
        'label': 'Login',
        'placeholder': 'Wufoo login',
        'required': True
    }, {
        'id': 'wufoo_api_key',
        'type': 'password',
        'label': 'API key',
        'placeholder': 'Wufoo API Key',
        'required': True
    }]

    parser_restricted_values = ['wufoo']

    def __init__(self):
        self.fields_cache = {}

    def _update(self, provider, update):
        user = provider['config']['wufoo_username']
        wufoo_data = {
            "url": WUFOO_URL.format(subdomain=user),
            "user": user,
            "api_key": provider['config']['wufoo_api_key'],
            "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES,
            "update": update
        }
        try:
            parser = self.get_feed_parser(provider, None)
        except requests.exceptions.Timeout as ex:
            raise IngestApiError.apiTimeoutError(ex, provider)
        except requests.exceptions.TooManyRedirects as ex:
            raise IngestApiError.apiRedirectError(ex, provider)
        except requests.exceptions.RequestException as ex:
            raise IngestApiError.apiRequestError(ex, provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
        items = parser.parse(wufoo_data, provider)
        return [items]
Exemple #11
0
    def _fetch_data(self, config, provider):
        url = config['url']
        api_key = config['api_key']

        last_update = provider.get('last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S')

        # Results are pagified so we'll read this many at a time
        offset_jump = 10

        params = {'start': last_update, 'limit': offset_jump}
        headers = {'apikey': api_key}

        items = []

        offset = 0
        while True:
            params['offset'] = offset

            try:
                response = requests.get(url, params=params, headers=headers, timeout=30)
            except requests.exceptions.ConnectionError as err:
                raise IngestApiError.apiConnectionError(exception=err)

            if response.ok:
                # The total number of results are given to us in json, get them
                # via a regex to read the field so we don't have to convert the
                # whole thing to json pointlessly
                item_ident = re.search('\"total\": *[0-9]*', response.text).group()
                results_str = re.search('[0-9]+', item_ident).group()

                if results_str is None:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.text), provider)

                num_results = int(results_str)

                if num_results > 0:
                    items.append(response.text)

                if offset >= num_results:
                    return items

                offset += offset_jump
            else:
                if re.match('Error: No API Key provided', response.text):
                    raise IngestApiError.apiAuthError(
                        Exception(response.text), provider)
                elif response.status_code == 404:
                    raise IngestApiError.apiNotFoundError(
                        Exception(response.reason), provider)
                else:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.reason), provider)

        return items
Exemple #12
0
    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
            id_list = config['idList']
            # before "products" was hardcoded as value for "idListType"
            id_list_type = config.get('idListType', 'products')
            if not user.strip() or not password.strip() or not id_list.strip():
                raise KeyError
        except KeyError:
            raise SuperdeskIngestError.notConfiguredError(Exception('username, password and idList are needed'))

        # we remove spaces and empty values from id_list to do a clean list
        id_list = ','.join([id_.strip() for id_ in id_list.split(',') if id_.strip()])

        params = {'idList': id_list,
                  'idListType': id_list_type,
                  'format': '5',
                  'maxItems': '25',
                  'sortOrder': 'chronological'}
        try:
            min_date_time = provider['private']['min_date_time']
            sequence_number = provider['private']['sequence_number']
        except KeyError:
            pass
        else:
            params['minDateTime'] = min_date_time
            params['sequenceNumber'] = sequence_number

        try:
            r = requests.get(URL, auth=(user, password), params=params)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.content)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        parser = self.get_feed_parser(provider)
        items = parser.parse(root_elt, provider)

        try:
            min_date_time = root_elt.xpath('//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip()
            sequence_number = root_elt.xpath('//iptc:transmitId/text()', namespaces=NS)[0].strip()
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('missing minDateTime or transmitId'))
        else:
            update.setdefault('private', {})
            update['private']['min_date_time'] = min_date_time
            update['private']['sequence_number'] = sequence_number

        return [items]
Exemple #13
0
    def _update(self, provider, update):
        # Each update run will retrieve the data for a single "market"
        market_index = provider.get('private', {}).get('market_index', 0)
        markets = json.loads(
            provider.get('config', {}).get('market_definitions',
                                           []).replace('\'', '"'))
        market = markets[market_index]
        logger.info('Retrieving fuel data for the {} market'.format(
            market.get('market')))

        try:
            self.session_token = self._get_token(provider).get('id')
            prices = self._get_prices(provider, market)
            self._save(prices, market)
        except Exception as ex:
            raise IngestApiError.apiGeneralError(ex, self.provider)
        finally:
            # Save the next market to process
            market_index = (market_index + 1) % len(markets)
            get_resource_service('ingest_providers').system_update(
                provider.get('_id'),
                {'private': {
                    'market_index': market_index
                }}, provider)

        return None
Exemple #14
0
    def _generate_auth_token(self, provider):
        """
        Generates Authentication Token as per the configuration in Ingest Provider.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :return: token details if successfully authenticated
        :rtype: str
        :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration
        """
        session = requests.Session()
        session.mount('https://', SSLAdapter())

        auth_url = provider.get('config', {}).get('auth_url', None)
        if not auth_url:
            raise IngestApiError.apiGeneralError(provider=provider,
                                                 exception=KeyError('''
                                                     Ingest Provider {} is missing Authentication URL.
                                                     Please check the configuration.
                                                     '''.format(
                                                     provider['name'])))

        payload = {
            'username': provider.get('config', {}).get('username', ''),
            'password': provider.get('config', {}).get('password', ''),
        }

        response = session.get(auth_url,
                               params=payload,
                               verify=False,
                               timeout=30)
        tree = etree.fromstring(
            response.content)  # workaround for http mock lib

        return tree.text
    def _get_worksheet(self, provider):
        """Get worksheet from google spreadsheet

        :return: worksheet
        :rtype: object
        """
        scope = [
            'https://spreadsheets.google.com/feeds',
            'https://www.googleapis.com/auth/drive',
        ]
        config = provider.get('config', {})
        url = config.get('url', '')
        service_account = config.get('service_account', '')
        title = config.get('worksheet_title', '')

        try:
            service_account = json.loads(service_account)
            credentials = ServiceAccountCredentials.from_json_keyfile_dict(
                service_account, scope)
            gc = gspread.authorize(credentials)
            spreadsheet = gc.open_by_url(url)
            permission = spreadsheet.list_permissions()[0]
            if permission['role'] != 'writer':
                raise IngestSpreadsheetError.SpreadsheetPermissionError()
            worksheet = spreadsheet.worksheet(title)
            return worksheet
        except (json.decoder.JSONDecodeError, AttributeError, ValueError) as e:
            # both permission and credential raise Value error
            if e.args[0] == 15100:
                raise IngestSpreadsheetError.SpreadsheetPermissionError()
            raise IngestSpreadsheetError.SpreadsheetCredentialsError()
        except gspread.exceptions.NoValidUrlKeyFound:
            raise IngestApiError.apiNotFoundError()
        except gspread.exceptions.WorksheetNotFound:
            raise IngestSpreadsheetError.WorksheetNotFoundError()
        except gspread.exceptions.APIError as e:
            error = e.response.json()['error']
            response_code = error['code']
            logger.error('Provider %s: %s', provider.get('name'),
                         error['message'])
            if response_code == 403:
                raise IngestSpreadsheetError.SpreadsheetPermissionError()
            elif response_code == 429:
                raise IngestSpreadsheetError.SpreadsheetQuotaLimitError()
            else:
                raise IngestApiError.apiNotFoundError()
Exemple #16
0
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url,
                                            params=payload,
                                            timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn(
                        'Reuters API timeout retrying, retries {}'.format(
                            retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError(
                    _('Not found {payload}').format(payload=payload))

            break

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Exemple #17
0
    def _update(self, provider, update):
        config = self.config
        try:
            user, password = self.config['username'], self.config['password']
        except KeyError:
            SuperdeskIngestError.notConfiguredError(Exception('username and password are needed'))

        url_override = config.get('url', '').strip()
        if not url_override.startswith('http'):
            SuperdeskIngestError.notConfiguredError(Exception('if URL is set, it must be a valid http link'))

        if url_override:
            params = {'user': user, 'password': password, 'maksAntal': 50}
        else:
            params = {'user': user, 'password': password, 'maksAntal': 50, 'waitAcknowledge': 'true'}

        r = self.get_url(url_override, params=params)

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while parsing the request answer'))

        try:
            if root_elt.xpath('(//error/text())[1]')[0] != '0':
                err_msg = root_elt.xpath('(//errormsg/text())[1]')[0]
                raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found'))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath('//RBNews'):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath('.//ServiceQueueId/text()')[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element'))
                ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id}
                self.get_url(URL_ACK, params=ack_params)

        return [items]
Exemple #18
0
    def _generate_auth_token(self, provider):
        """
        Generates Authentication Token as per the configuration in Ingest Provider.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :return: token details if successfully authenticated
        :rtype: str
        :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration
        """
        session = requests.Session()
        session.mount("https://", SSLAdapter())

        auth_url = provider.get("config", {}).get("auth_url", None)
        if not auth_url:
            raise IngestApiError.apiGeneralError(
                provider=provider,
                exception=KeyError(
                    """
                                                     Ingest Provider {} is missing Authentication URL.
                                                     Please check the configuration.
                                                     """.format(
                        provider["name"]
                    )
                ),
            )

        payload = {
            "username": provider.get("config", {}).get("username", ""),
            "password": provider.get("config", {}).get("password", ""),
        }

        response = session.get(auth_url, params=payload, verify=False, timeout=30)
        if response.status_code < 200 or response.status_code >= 300:
            try:
                response.raise_for_status()
            except Exception:
                err = IngestApiError.apiAuthError(provider=provider)
                self.close_provider(provider, err, force=True)
                raise err

        tree = etree.fromstring(response.content)  # workaround for http mock lib
        return tree.text
Exemple #19
0
    def _fetch_data(self, config, provider):
        """Fetch the latest feed data.

        :param dict config: RSS resource configuration
        :param provider: data provider instance, needed as an argument when
            raising ingest errors
        :return: fetched RSS data
        :rtype: str

        :raises IngestApiError: if fetching data fails for any reason
            (e.g. authentication error, resource not found, etc.)
        """
        url = config['url']

        if config.get('auth_required', False):
            auth = (config.get('username'), config.get('password'))
            self.auth_info = {
                'username': config.get('username', ''),
                'password': config.get('password', '')
            }
        else:
            auth = None

        try:
            response = requests.get(url, auth=auth, timeout=30)
        except requests.exceptions.ConnectionError as err:
            raise IngestApiError.apiConnectionError(exception=err, provider=provider)
        except requests.exceptions.RequestException as err:
            raise IngestApiError.apiURLError(exception=err, provider=provider)

        if response.ok:
            return response.content
        else:
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(
                    Exception(response.reason), provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
    def _request(self, url):
        try:
            response = requests.get(url, params={}, timeout=120)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found')

        return response.content
Exemple #21
0
    def _request(self, url):
        try:
            response = requests.get(url, params={}, timeout=120)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found')

        return response.content
Exemple #22
0
 def _update(self, provider, update):
     user = provider['config']['wufoo_username']
     wufoo_data = {
         "url": WUFOO_URL.format(subdomain=user),
         "user": user,
         "api_key": provider['config']['wufoo_api_key'],
         "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES,
         "update": update}
     try:
         parser = self.get_feed_parser(provider, None)
     except requests.exceptions.Timeout as ex:
         raise IngestApiError.apiTimeoutError(ex, provider)
     except requests.exceptions.TooManyRedirects as ex:
         raise IngestApiError.apiRedirectError(ex, provider)
     except requests.exceptions.RequestException as ex:
         raise IngestApiError.apiRequestError(ex, provider)
     except Exception as error:
         traceback.print_exc()
         raise IngestApiError.apiGeneralError(error, self.provider)
     items = parser.parse(wufoo_data, provider)
     return [items]
Exemple #23
0
 def _update(self, provider, update):
     user = provider['config']['wufoo_username']
     wufoo_data = {
         "url": WUFOO_URL.format(subdomain=user),
         "user": user,
         "api_key": provider['config']['wufoo_api_key'],
         "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES,
         "update": update}
     try:
         parser = self.get_feed_parser(provider, None)
     except requests.exceptions.Timeout as ex:
         raise IngestApiError.apiTimeoutError(ex, provider)
     except requests.exceptions.TooManyRedirects as ex:
         raise IngestApiError.apiRedirectError(ex, provider)
     except requests.exceptions.RequestException as ex:
         raise IngestApiError.apiRequestError(ex, provider)
     except Exception as error:
         traceback.print_exc()
         raise IngestApiError.apiGeneralError(error, self.provider)
     items = parser.parse(wufoo_data, provider)
     return [items]
 def test_raise_apiNotFoundError(self):
     with assert_raises(IngestApiError) as error_context:
         ex = Exception("Testing apiNotFoundError")
         raise IngestApiError.apiNotFoundError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4006)
     self.assertTrue(exception.message == "API service not found(404) error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing apiNotFoundError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "IngestApiError Error 4006 - API service not found(404) error: "
                      "Testing apiNotFoundError on channel TestProvider")
Exemple #25
0
    def _test(self, provider):
        config = provider.get('config', {})
        url = config['url']
        api_key = config['api_key']

        # limit the data to a single article and filter out all article fields
        # to save bandwidth
        params = {'limit': 1, 'fields': 'id'}
        headers = {'apikey': api_key}

        try:
            response = requests.get(url, params=params, headers=headers, timeout=30)
        except requests.exceptions.ConnectionError as err:
            raise IngestApiError.apiConnectionError(exception=err)

        if not response.ok:
            if response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
 def test_raise_apiRedirectError(self):
     with assert_raises(IngestApiError) as error_context:
         ex = Exception("Testing apiRedirectError")
         raise IngestApiError.apiRedirectError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4002)
     self.assertTrue(exception.message == "API ingest has too many redirects")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing apiRedirectError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "IngestApiError Error 4002 - API ingest has too many redirects: "
                      "Testing apiRedirectError on channel TestProvider")
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url, params=payload, timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn('Reuters API timeout retrying, retries {}'.format(retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError('Not found %s' % payload)

            break

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
class NewsworthyFeedingService(FeedingService):
    """
    Feeding Service class which can retrieve articles from Newsworthy web service
    """

    NAME = 'newsworthy'

    ERRORS = [IngestApiError.apiRequestError().get_error_description(),
              SuperdeskIngestError.notConfiguredError().get_error_description()]

    label = 'Newsworthy'

    fields = [
        {
            'id': 'url', 'type': 'text', 'label': 'Use this URL for webhook',
            'default_value': '',
            'readonly': True,
        },
        {
            'id': 'username', 'type': 'text', 'label': 'Username',
            'required': True
        },
        {
            'id': 'password', 'type': 'password', 'label': 'Password',
            'required': True
        },
        {
            'id': 'secret', 'type': 'password', 'label': 'Shared Secret',
            'placeholder': 'Shared Secret', 'required': False
        },
    ]

    def _update(self, provider, update):
        try:
            data = provider['newsworthy_data']
        except IndexError:
            return [[]]
        if data['hook']['event'] == EVENT_UNPUBLISHED:
            logger.info("ignoring unpublish event on following data:\n{data}".format(data=data))
            return [[]]

        # we have to write to a temporary file because feed parser expect a file path
        # FIXME: it would be better to use the data directly
        with NamedTemporaryFile('w') as f:
            json.dump(data['data'], f)
            f.seek(0)
            parser = self.get_feed_parser(provider, f.name)
            items = parser.parse(f.name, provider)

        return [items]
Exemple #29
0
 def test_raise_apiParseError(self):
     with assert_raises(IngestApiError) as error_context:
         ex = Exception("Testing apiParseError")
         raise IngestApiError.apiParseError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4005)
     self.assertTrue(exception.message == "API ingest xml parse error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEquals(exception.system_exception.args[0], "Testing apiParseError")
     self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1)
     self.assertEqual(
         self.mock_logger_handler.messages["error"][0],
         "IngestApiError Error 4005 - API ingest xml parse error: " "Testing apiParseError on channel TestProvider",
     )
Exemple #30
0
 def test_raise_apiRequestError(self):
     with assert_raises(IngestApiError) as error_context:
         ex = Exception("Testing apiRequestError")
         raise IngestApiError.apiRequestError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4003)
     self.assertTrue(exception.message == "API ingest has request error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEquals(exception.system_exception.args[0],
                       "Testing apiRequestError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(
         self.mock_logger_handler.messages['error'][0],
         "IngestApiError Error 4003 - API ingest has request error: "
         "Testing apiRequestError on channel TestProvider")
 def test_raise_apiNotFoundError(self):
     with assert_raises(IngestApiError) as error_context:
         try:
             ex = Exception("Testing apiNotFoundError")
             raise ex
         except Exception:
             raise IngestApiError.apiNotFoundError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4006)
     self.assertTrue(exception.message == "API service not found(404) error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing apiNotFoundError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "IngestApiError Error 4006 - API service not found(404) error: "
                      "Testing apiNotFoundError on channel TestProvider")
 def test_raise_apiTimeoutError(self):
     with assert_raises(IngestApiError) as error_context:
         try:
             ex = Exception("Testing apiTimeoutError")
             raise ex
         except Exception:
             raise IngestApiError.apiTimeoutError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4001)
     self.assertTrue(exception.message == "API ingest connection has timed out.")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing apiTimeoutError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "IngestApiError Error 4001 - API ingest connection has timed out.: "
                      "Testing apiTimeoutError on channel TestProvider")
 def test_raise_apiUnicodeError(self):
     with assert_raises(IngestApiError) as error_context:
         try:
             ex = Exception("Testing apiUnicodeError")
             raise ex
         except Exception:
             raise IngestApiError.apiUnicodeError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4004)
     self.assertTrue(exception.message == "API ingest Unicode Encode Error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing apiUnicodeError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "IngestApiError Error 4004 - API ingest Unicode Encode Error: "
                      "Testing apiUnicodeError on channel TestProvider")
Exemple #34
0
    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
        except KeyError:
            SuperdeskIngestError.notConfiguredError(Exception('username and password are needed'))

        url_override = config.get('url', '').strip()
        if not url_override.startswith('http'):
            SuperdeskIngestError.notConfiguredError(Exception('if URL is set, it must be a valid http link'))

        if url_override:
            params = {'user': user, 'password': password, 'maksAntal': 50}
        else:
            params = {'user': user, 'password': password, 'maksAntal': 50, 'waitAcknowledge': 'true'}

        try:
            r = requests.get(url_override or URL, params=params)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while parsing the request answer'))

        try:
            if root_elt.xpath('(//error/text())[1]')[0] != '0':
                err_msg = root_elt.xpath('(//errormsg/text())[1]')[0]
                raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found'))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath('//RBNews'):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath('.//ServiceQueueId/text()')[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element'))
                ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id}
                try:
                    requests.get(URL_ACK, params=ack_params)
                except Exception:
                    raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        return [items]
Exemple #35
0
    def test_raise_apiGeneralError(self):
        with assert_raises(IngestApiError) as error_context:
            ex = Exception("Testing general API error")
            raise IngestApiError.apiGeneralError(ex, self.provider)

        exception = error_context.exception
        self.assertEqual(exception.code, 4000)
        self.assertEqual(exception.message, "Unknown API ingest error")
        self.assertEqual(exception.provider_name, "TestProvider")

        self.assertIsNotNone(exception.system_exception)
        self.assertEqual(exception.system_exception.args[0], "Testing general API error")

        self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1)
        self.assertEqual(
            self.mock_logger_handler.messages["error"][0],
            "IngestApiError Error 4000 - Unknown API ingest error: "
            "Testing general API error on channel TestProvider",
        )
Exemple #36
0
    def test_raise_apiGeneralError(self):
        with assert_raises(IngestApiError) as error_context:
            ex = Exception("Testing general API error")
            raise IngestApiError.apiGeneralError(ex, self.provider)

        exception = error_context.exception
        self.assertEqual(exception.code, 4000)
        self.assertEqual(exception.message, "Unknown API ingest error")
        self.assertEqual(exception.provider_name, "TestProvider")

        self.assertIsNotNone(exception.system_exception)
        self.assertEqual(exception.system_exception.args[0],
                         "Testing general API error")

        self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
        self.assertEqual(
            self.mock_logger_handler.messages['error'][0],
            "IngestApiError Error 4000 - Unknown API ingest error: "
            "Testing general API error on channel TestProvider")
Exemple #37
0
 def test_raise_apiParseError(self):
     with assert_raises(IngestApiError) as error_context:
         try:
             ex = Exception("Testing apiParseError")
             raise ex
         except Exception:
             raise IngestApiError.apiParseError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4005)
     self.assertTrue(exception.message == "API ingest xml parse error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0],
                      "Testing apiParseError")
     self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1)
     self.assertEqual(
         self.mock_logger_handler.messages["error"][0],
         "IngestApiError Error 4005 - API ingest xml parse error: "
         "Testing apiParseError on channel TestProvider",
     )
Exemple #38
0
    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Exemple #39
0
    def get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload."""
        if payload is None:
            payload = {}
        payload['token'] = self.get_token()
        url = self.get_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=21.0)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)

        if response.status_code == 404:
            raise IngestApiError.apiNotFoundError(
                LookupError('Not found %s' % payload), self.provider)

        try:
            # workaround for httmock lib
            # return etree.fromstring(response.text.encode('utf-8'))
            return etree.fromstring(response.content)
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)
Exemple #40
0
    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Exemple #41
0
    def get_url(self, url=None, **kwargs):
        """Do an HTTP Get on URL and validate response.

        :param string url: url to use (None to use self.HTTP_URL)
        :param **kwargs: extra parameter for requests
        :return dict: response content data
        """
        response = super().get_url(url=url, **kwargs)
        content = response.json()

        if content['hasError']:
            msg = "Error in GET: '{}'. ErrorCode: '{}'. Description: '{}'".format(
                url,
                content['data']['errorCode'],
                content['data']['description']
            )
            logger.error(msg)
            raise IngestApiError.apiGeneralError(Exception(msg), self.provider)

        return content['data']
Exemple #42
0
    def _fetch_data(self):
        url = self.config['url']
        api_key = self.config['api_key']

        last_update = self.provider.get(
            'last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S')

        # Results are pagified so we'll read this many at a time
        offset_jump = 10

        params = {'start': last_update, 'limit': offset_jump}
        headers = {'apikey': api_key}

        items = []

        offset = 0
        while True:
            params['offset'] = offset

            response = self.get_url(url, params=params, headers=headers)
            # The total number of results are given to us in json, get them
            # via a regex to read the field so we don't have to convert the
            # whole thing to json pointlessly
            item_ident = re.search('\"total\": *[0-9]*', response.text).group()
            results_str = re.search('[0-9]+', item_ident).group()

            if results_str is None:
                raise IngestApiError.apiGeneralError(Exception(response.text),
                                                     self.provider)

            num_results = int(results_str)

            if num_results > 0:
                items.append(response.text)

            if offset >= num_results:
                return items

            offset += offset_jump

        return items
    def _update(self, provider, update):
        # Each update run will retrieve the data for a single "market"
        market_index = provider.get('private', {}).get('market_index', 0)
        markets = json.loads(provider.get('config', {}).get('market_definitions', []).replace('\'', '"'))
        market = markets[market_index]
        logger.info('Retrieving fuel data for the {} market'.format(market.get('market')))

        try:
            self.session_token = self._get_token(provider).get('id')
            prices = self._get_prices(provider, market)
            self._save(prices, market)
        except Exception as ex:
            raise IngestApiError.apiGeneralError(ex, self.provider)
        finally:
            # Save the next market to process
            market_index = (market_index + 1) % len(markets)
            get_resource_service('ingest_providers').system_update(provider.get('_id'),
                                                                   {'private': {'market_index': market_index}},
                                                                   provider)

        return None
Exemple #44
0
class EventHTTPFeedingService(HTTPFeedingServiceBase):
    """
    Feeding Service class which can read events using HTTP
    """

    NAME = 'event_http'
    label = 'Event HTTP feed'
    service = 'events'
    fields = [
        {
            'id': 'url', 'type': 'text', 'label': 'Feed URL',
            'placeholder': 'Feed URL', 'required': True
        }
    ]
    ERRORS = [IngestApiError.apiTimeoutError().get_error_description(),
              IngestApiError.apiRedirectError().get_error_description(),
              IngestApiError.apiRequestError().get_error_description(),
              IngestApiError.apiUnicodeError().get_error_description(),
              IngestApiError.apiParseError().get_error_description(),
              IngestApiError.apiGeneralError().get_error_description()]
    HTTP_AUTH = False

    def _update(self, provider, update):
        """
        Fetch events from external API.

        :param provider: Ingest Provider Details.
        :type provider: dict
        :param update: Any update that is required on provider.
        :type update: dict
        :return: a list of events which can be saved.
        """

        response = self.get_url(self.config['url'])
        parser = self.get_feed_parser(provider)

        logger.info('Ingesting events with {} parser'.format(parser.__class__.__name__))
        logger.info('Ingesting content: {} ...'.format(str(response.content)[:4000]))

        if hasattr(parser, 'parse_http'):
            items = parser.parse_http(response.content, provider)
        else:
            items = parser.parse(response.content)

        if isinstance(items, list):
            yield items
        else:
            yield [items]
    def test_raise_apiAuthError(self):
        with assert_raises(IngestApiError) as error_context:
            ex = Exception("Testing API authorization error")
            raise IngestApiError.apiAuthError(ex, self.provider)

        exception = error_context.exception
        self.assertEqual(exception.code, 4007)
        self.assertEqual(
            exception.message, "API authorization error")
        self.assertEqual(exception.provider_name, "TestProvider")

        self.assertIsNotNone(exception.system_exception)
        self.assertEqual(
            exception.system_exception.args[0],
            "Testing API authorization error")

        self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
        self.assertEqual(
            self.mock_logger_handler.messages['error'][0],
            "IngestApiError Error 4007 - API authorization error: "
            "Testing API authorization error on channel TestProvider")
Exemple #46
0
    def _update(self, provider, update):

        def convert_date(epoch):
            dt = local_to_utc(config.DEFAULT_TIMEZONE, datetime.fromtimestamp(int(str(epoch)[:10])))
            return dt

        username = provider.get('config', {}).get('username')
        password = provider.get('config', {}).get('password')
        url = provider.get('config', {}).get('api_url')

        try:
            response = requests.get(url, auth=(username, password))
            response.raise_for_status()
        except Exception as ex:
            raise IngestApiError.apiGeneralError(ex, self.provider)

        data = json.loads(response.content.decode('UTF-8'))

        service = get_resource_service('traffic_incidents')
        incidents = []
        for feature in data.get('features', []):
            props = feature.get('properties', {})
            incident = {
                'guid': int(props.get('id')),
                'start_date': convert_date(props.get('startDate')),
                'end_date': convert_date(props.get('endDate')),
                'incident_type': props.get('type'),
                'incident_description': props.get('description'),
                'city': props.get('city'),
                'state': props.get('state'),
                'from_street_name': props.get('fromStreetName'),
                'from_cross_street_name': props.get('fromCrossStreetName'),
                'to_street_name': props.get('toStreetName'),
                'to_cross_street_name': props.get('toCrossStreetName'),
                'geometry': feature.get('geometry')
            }
            incident.get('geometry').pop('crs')
            incidents.append(incident)
        service.delete(lookup={})
        service.post(incidents)
    def test_raise_apiAuthError(self):
        with assert_raises(IngestApiError) as error_context:
            try:
                ex = Exception("Testing API authorization error")
                raise ex
            except:
                raise IngestApiError.apiAuthError(ex, self.provider)

        exception = error_context.exception
        self.assertEqual(exception.code, 4007)
        self.assertEqual(exception.message, "API authorization error")
        self.assertEqual(exception.provider_name, "TestProvider")

        self.assertIsNotNone(exception.system_exception)
        self.assertEqual(exception.system_exception.args[0],
                         "Testing API authorization error")

        self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
        self.assertEqual(
            self.mock_logger_handler.messages['error'][0],
            "IngestApiError Error 4007 - API authorization error: "
            "Testing API authorization error on channel TestProvider")
Exemple #48
0
    def get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload."""
        if payload is None:
            payload = {}
        payload['token'] = self.get_token()
        url = self.get_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=21.0)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)

        if response.status_code == 404:
            raise IngestApiError.apiNotFoundError(LookupError('Not found %s' % payload), self.provider)

        try:
            # workaround for httmock lib
            # return etree.fromstring(response.text.encode('utf-8'))
            return etree.fromstring(response.content)
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)
Exemple #49
0
class ReutersHTTPFeedingService(HTTPFeedingService):
    """
    Feeding Service class which can read article(s) using HTTP provided by Reuters.
    """

    NAME = "reuters_http"

    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiUnicodeError().get_error_description(),
        IngestApiError.apiParseError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description(),
    ]

    DATE_FORMAT = "%Y.%m.%d.%H.%M"

    label = "Reuters feed API"

    fields = [
        {
            "id": "url",
            "type": "text",
            "label": "Feed URL",
            "placeholder": "Feed URL",
            "required": True,
            "default": "http://rmb.reuters.com/rmd/rest/xml",
        },
        {
            "id": "auth_url",
            "type": "text",
            "label": "URL for Authentication",
            "placeholder": "authentication url",
            "required": True,
            "default": "https://commerce.reuters.com/rmd/rest/xml/login",
        },
        {"id": "username", "type": "text", "label": "Username", "placeholder": "Username", "required": True},
        {"id": "password", "type": "password", "label": "Password", "placeholder": "Password", "required": True},
    ]

    session = None

    def _update(self, provider, update):
        updated = utcnow()

        last_updated = provider.get("last_updated")
        ttl_minutes = app.config["INGEST_EXPIRY_MINUTES"]
        if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get("config")
        if not provider_config:
            provider_config = {}
            provider["config"] = provider_config

        provider_config.setdefault("url", "http://rmb.reuters.com/rmd/rest/xml")
        provider_config.setdefault("auth_url", "https://commerce.reuters.com/rmd/rest/xml/login")
        self.URL = provider_config.get("url")

        for channel in self._get_channels():
            ids = self._get_article_ids(channel, last_updated, updated)
            for id in ids:
                try:
                    items = self.fetch_ingest(id)
                    if items:
                        yield items
                # if there was an exception processing the one of the bunch log it and continue
                except Exception as ex:
                    logger.warn("Reuters item {} has not been retrieved".format(id))
                    logger.exception(ex)

    def _get_channels(self):
        """Get subscribed channels."""
        channels = []
        tree = self._get_tree("channels")
        for channel in tree.findall("channelInformation"):
            channels.append(channel.find("alias").text)

        return channels

    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload["token"] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url, params=payload, timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn("Reuters API timeout retrying, retries {}".format(retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError(_("Not found {payload}").format(payload=payload))

            break

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

    def _get_absolute_url(self, endpoint):
        """
        Get absolute URL for given endpoint.

        :param: endpoint
        :type endpoint: str
        """
        return "/".join([self.URL, endpoint])

    def _get_article_ids(self, channel, last_updated, updated):
        """
        Get article ids which should be upserted also save the poll token that is returned.
        """
        ids = set()
        payload = {"channel": channel, "fieldsRef": "id"}

        # check if the channel has a pollToken if not fall back to dateRange
        last_poll_token = self._get_poll_token(channel)
        if last_poll_token is not None:
            logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token))
            payload["pollToken"] = last_poll_token
        else:
            payload["dateRange"] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated))
            logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload["dateRange"]))

        tree = self._get_tree("items", payload)
        status_code = tree.find("status").get("code") if tree.tag == "results" else tree.get("code")
        # check the returned status
        if status_code != "10":
            logger.warn("Reuters channel request returned status code {}".format(status_code))
            # status code 30 indicates failure
            if status_code == "30":
                # invalid token
                logger.warn(
                    "Reuters error on channel {} code {} {}".format(
                        channel, tree.find("error").get("code"), tree.find("error").text
                    )
                )
                if tree.find("error").get("code") == "2100":
                    self._save_poll_token(channel, None)
                    logger.warn("Reuters channel invalid token reseting {}".format(status_code))
                return ids

        # extract the returned poll token if there is one
        poll_token = tree.find("pollToken")
        if poll_token is not None:
            # a new token indicated new content
            if poll_token.text != last_poll_token:
                logger.info("Reuters channel {} new token {}".format(channel, poll_token.text))
                self._save_poll_token(channel, poll_token.text)
            else:
                # the token has not changed, so nothing new
                logger.info("Reuters channel {} nothing new".format(channel))
                return ids
        else:
            logger.info("Reuters channel {} retrieved no token".format(channel))
            return ids

        for result in tree.findall("result"):
            id = result.find("id").text
            ids.add(id)
            logger.info("Reuters id : {}".format(id))

        return ids

    def _save_poll_token(self, channel, poll_token):
        """Saves the poll token for the passed channel in the config section of the

        :param channel:
        :param poll_token:
        :return:
        """
        # get the provider in case it has been updated by another channel
        ingest_provider_service = superdesk.get_resource_service("ingest_providers")
        provider = ingest_provider_service.find_one(req=None, _id=self.provider[superdesk.config.ID_FIELD])
        provider_token = provider.get("tokens")
        if "poll_tokens" not in provider_token:
            provider_token["poll_tokens"] = {channel: poll_token}
        else:
            provider_token["poll_tokens"][channel] = poll_token
        upd_provider = {"tokens": provider_token}
        ingest_provider_service.system_update(self.provider[superdesk.config.ID_FIELD], upd_provider, self.provider)

    def _get_poll_token(self, channel):
        """Get the poll token from provider config if it is available.

        :param channel:
        :return: token
        """
        if "tokens" in self.provider and "poll_tokens" in self.provider["tokens"]:
            return self.provider.get("tokens").get("poll_tokens").get(channel, None)

    def _format_date(self, date):
        return date.strftime(self.DATE_FORMAT)

    def fetch_ingest(self, id):
        items = self._parse_items(id)
        result_items = []
        while items:
            item = items.pop()
            self.localize_timestamps(item)
            try:
                items.extend(self._fetch_items_in_package(item))
                result_items.append(item)
            except LookupError as err:
                self.log_item_error(err, item, self.provider)
                return []

        return result_items

    def _parse_items(self, id):
        """
        Parse item message and return given items.
        """

        payload = {"id": id}
        tree = self._get_tree("item", payload)

        parser = self.get_feed_parser(self.provider, tree)
        items = parser.parse(tree, self.provider)

        return items

    def _fetch_items_in_package(self, item):
        """
        Fetch remote assets for given item.
        """
        items = []
        for group in item.get("groups", []):
            for ref in group.get("refs", []):
                if "residRef" in ref:
                    items.extend(self._parse_items(ref.get("residRef")))

        return items

    def prepare_href(self, href, mimetype=None):
        (scheme, netloc, path, params, query, fragment) = urlparse(href)
        new_href = urlunparse((scheme, netloc, path, "", "", ""))
        return "%s?auth_token=%s" % (new_href, self._get_auth_token(self.provider, update=True))
Exemple #50
0
import requests

from superdesk.io.ingest_service import IngestService

from superdesk.utc import utcnow
from superdesk.etree import etree, ParseError
from superdesk.io import register_provider
from .newsml_2_0 import NewsMLTwoParser
from .reuters_token import get_token
from superdesk.errors import IngestApiError
from flask import current_app as app


PROVIDER = 'reuters'
errors = [IngestApiError.apiTimeoutError().get_error_description(),
          IngestApiError.apiRedirectError().get_error_description(),
          IngestApiError.apiRequestError().get_error_description(),
          IngestApiError.apiUnicodeError().get_error_description(),
          IngestApiError.apiParseError().get_error_description(),
          IngestApiError.apiGeneralError().get_error_description()]


class ReutersIngestService(IngestService):
    """Reuters ingest service."""

    DATE_FORMAT = '%Y.%m.%d.%H.%M'
    URL = 'http://rmb.reuters.com/rmd/rest/xml'
    token = None

    def __init__(self):
Exemple #51
0
from datetime import datetime

from superdesk.errors import IngestApiError, ParserError
from superdesk.io import register_provider
from superdesk.io.ingest_service import IngestService
from superdesk.utils import merge_dicts

from urllib.parse import quote as urlquote, urlsplit, urlunsplit


PROVIDER = "rss"

utcfromtimestamp = datetime.utcfromtimestamp

errors = [
    IngestApiError.apiAuthError().get_error_description(),
    IngestApiError.apiNotFoundError().get_error_description(),
    IngestApiError.apiGeneralError().get_error_description(),
    ParserError.parseMessageError().get_error_description(),
]


class RssIngestService(IngestService):
    """Ingest service for providing feeds received in RSS 2.0 format.

    (NOTE: it should also work with other syndicated feeds formats, too, since
    the underlying parser supports them, but for our needs RSS 2.0 is assumed)
    """

    ItemField = namedtuple("ItemField", ["name", "name_in_data", "type"])