Exemple #1
0
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url,
                                            params=payload,
                                            timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn(
                        'Reuters API timeout retrying, retries {}'.format(
                            retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError(
                    _('Not found {payload}').format(payload=payload))

            break

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url, params=payload, timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn('Reuters API timeout retrying, retries {}'.format(retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError('Not found %s' % payload)

            break

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
 def test_raise_apiParseError(self):
     with assert_raises(IngestApiError) as error_context:
         ex = Exception("Testing apiParseError")
         raise IngestApiError.apiParseError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4005)
     self.assertTrue(exception.message == "API ingest xml parse error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEqual(exception.system_exception.args[0], "Testing apiParseError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(self.mock_logger_handler.messages['error'][0],
                      "IngestApiError Error 4005 - API ingest xml parse error: "
                      "Testing apiParseError on channel TestProvider")
Exemple #4
0
 def test_raise_apiParseError(self):
     with assert_raises(IngestApiError) as error_context:
         ex = Exception("Testing apiParseError")
         raise IngestApiError.apiParseError(ex, self.provider)
     exception = error_context.exception
     self.assertTrue(exception.code == 4005)
     self.assertTrue(exception.message == "API ingest xml parse error")
     self.assertIsNotNone(exception.system_exception)
     self.assertEquals(exception.system_exception.args[0],
                       "Testing apiParseError")
     self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
     self.assertEqual(
         self.mock_logger_handler.messages['error'][0],
         "IngestApiError Error 4005 - API ingest xml parse error: "
         "Testing apiParseError on channel TestProvider")
Exemple #5
0
class EventHTTPFeedingService(HTTPFeedingServiceBase):
    """
    Feeding Service class which can read events using HTTP
    """

    NAME = 'event_http'
    label = 'Event HTTP feed'
    service = 'events'
    fields = [
        {
            'id': 'url', 'type': 'text', 'label': 'Feed URL',
            'placeholder': 'Feed URL', 'required': True
        }
    ]
    ERRORS = [IngestApiError.apiTimeoutError().get_error_description(),
              IngestApiError.apiRedirectError().get_error_description(),
              IngestApiError.apiRequestError().get_error_description(),
              IngestApiError.apiUnicodeError().get_error_description(),
              IngestApiError.apiParseError().get_error_description(),
              IngestApiError.apiGeneralError().get_error_description()]
    HTTP_AUTH = False

    def _update(self, provider, update):
        """
        Fetch events from external API.

        :param provider: Ingest Provider Details.
        :type provider: dict
        :param update: Any update that is required on provider.
        :type update: dict
        :return: a list of events which can be saved.
        """

        response = self.get_url(self.config['url'])
        parser = self.get_feed_parser(provider)

        logger.info('Ingesting events with {} parser'.format(parser.__class__.__name__))
        logger.info('Ingesting content: {} ...'.format(str(response.content)[:4000]))

        if hasattr(parser, 'parse_http'):
            items = parser.parse_http(response.content, provider)
        else:
            items = parser.parse(response.content)

        if isinstance(items, list):
            yield items
        else:
            yield [items]
Exemple #6
0
    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Exemple #7
0
    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Exemple #8
0
    def get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload."""
        if payload is None:
            payload = {}
        payload['token'] = self.get_token()
        url = self.get_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=21.0)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)

        if response.status_code == 404:
            raise IngestApiError.apiNotFoundError(
                LookupError('Not found %s' % payload), self.provider)

        try:
            # workaround for httmock lib
            # return etree.fromstring(response.text.encode('utf-8'))
            return etree.fromstring(response.content)
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)
    def get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload."""
        if payload is None:
            payload = {}
        payload['token'] = self.get_token()
        url = self.get_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=21.0)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)

        if response.status_code == 404:
            raise IngestApiError.apiNotFoundError(LookupError('Not found %s' % payload), self.provider)

        try:
            # workaround for httmock lib
            # return etree.fromstring(response.text.encode('utf-8'))
            return etree.fromstring(response.content)
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)
Exemple #10
0
class ReutersHTTPFeedingService(HTTPFeedingService):
    """
    Feeding Service class which can read article(s) using HTTP provided by Reuters.
    """

    NAME = 'reuters_http'

    ERRORS = [IngestApiError.apiTimeoutError().get_error_description(),
              IngestApiError.apiRedirectError().get_error_description(),
              IngestApiError.apiRequestError().get_error_description(),
              IngestApiError.apiUnicodeError().get_error_description(),
              IngestApiError.apiParseError().get_error_description(),
              IngestApiError.apiGeneralError().get_error_description()]

    DATE_FORMAT = '%Y.%m.%d.%H.%M'

    def _update(self, provider):
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get('config')
        if not provider_config:
            provider_config = {}
            provider['config'] = provider_config

        if 'url' not in provider_config:
            provider_config['url'] = 'http://rmb.reuters.com/rmd/rest/xml'

        if 'auth_url' not in provider_config:
            provider_config['auth_url'] = 'https://commerce.reuters.com/rmd/rest/xml/login'

        self.URL = provider_config.get('url')

        for channel in self._get_channels():
            for guid in self._get_article_ids(channel, last_updated, updated):
                items = self.fetch_ingest(guid)
                if items:
                    yield items

    def _get_channels(self):
        """Get subscribed channels."""
        channels = []
        tree = self._get_tree('channels')
        for channel in tree.findall('channelInformation'):
            channels.append(channel.find('alias').text)

        return channels

    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

    def _get_absolute_url(self, endpoint):
        """
        Get absolute URL for given endpoint.

        :param: endpoint
        :type endpoint: str
        """
        return '/'.join([self.URL, endpoint])

    def _get_article_ids(self, channel, last_updated, updated):
        """
        Get article ids which should be upserted.
        """

        ids = set()
        payload = {'channel': channel, 'fieldsRef': 'id',
                   'dateRange': "%s-%s" % (self._format_date(last_updated), self._format_date(updated))}

        logger.info('Reuters requesting Date Range |{}| for channel {}'.format(payload['dateRange'], channel))
        tree = self._get_tree('items', payload)
        for result in tree.findall('result'):
            ids.add(result.find('guid').text)

        return ids

    def _format_date(self, date):
        return date.strftime(self.DATE_FORMAT)

    def fetch_ingest(self, guid):
        items = self._parse_items(guid)
        result_items = []
        while items:
            item = items.pop()
            self.add_timestamps(item)
            try:
                items.extend(self._fetch_items_in_package(item))
                result_items.append(item)
            except LookupError as err:
                self.log_item_error(err, item, self.provider)
                return []

        return result_items

    def _parse_items(self, guid):
        """
        Parse item message and return given items.
        """

        payload = {'id': guid}
        tree = self._get_tree('item', payload)

        parser = self.get_feed_parser(self.provider, tree)
        items = parser.parse(tree, self.provider)

        return items

    def _fetch_items_in_package(self, item):
        """
        Fetch remote assets for given item.
        """
        items = []
        for group in item.get('groups', []):
            for ref in group.get('refs', []):
                if 'residRef' in ref:
                    items.extend(self._parse_items(ref.get('residRef')))

        return items
class HTTPFeedingService(FeedingService, metaclass=ABCMeta):
    """
    Feeding Service class which can read article(s) using HTTP.
    """

    ERRORS = [IngestApiError.apiTimeoutError().get_error_description(),
              IngestApiError.apiRedirectError().get_error_description(),
              IngestApiError.apiRequestError().get_error_description(),
              IngestApiError.apiUnicodeError().get_error_description(),
              IngestApiError.apiParseError().get_error_description(),
              IngestApiError.apiGeneralError().get_error_description()]

    label = 'HTTP'

    def __init__(self):
        super().__init__()
        self.token = None

    def _generate_token_and_update_provider(self, provider):
        """
        Generates Authentication Token and updates the given provider with the authentication token.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :return: Authentication Token
        :rtype: str
        """
        token = {'auth_token': self._generate_auth_token(provider), 'created': utcnow()}
        get_resource_service('ingest_providers').system_update(provider[config.ID_FIELD], updates={'tokens': token},
                                                               original=provider)
        provider['tokens'] = token
        return token['auth_token']

    def _generate_auth_token(self, provider):
        """
        Generates Authentication Token as per the configuration in Ingest Provider.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :return: token details if successfully authenticated
        :rtype: str
        :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration
        """
        session = requests.Session()
        session.mount('https://', SSLAdapter())

        auth_url = provider.get('config', {}).get('auth_url', None)
        if not auth_url:
            raise IngestApiError.apiGeneralError(provider=provider,
                                                 exception=KeyError(
                                                     '''
                                                     Ingest Provider {} is missing Authentication URL.
                                                     Please check the configuration.
                                                     '''.format(provider['name']))
                                                 )

        payload = {
            'username': provider.get('config', {}).get('username', ''),
            'password': provider.get('config', {}).get('password', ''),
        }

        response = session.get(auth_url, params=payload, verify=False, timeout=30)
        if response.status_code < 200 or response.status_code >= 300:
            try:
                response.raise_for_status()
            except Exception:
                err = IngestApiError.apiAuthError(provider=provider)
                self.close_provider(provider, err, force=True)
                raise err

        tree = etree.fromstring(response.content)  # workaround for http mock lib
        return tree.text

    def _is_valid_token(self, token):
        """Check if the given token is still valid.

        Most of authentication tokens issued by Ingest Providers are valid for 12 hours.

        :param token: Token information
        :type token: dict
        :return: True if valid, False otherwise
        :rtype: bool
        """
        ttl = timedelta(hours=12)
        created = arrow.get(token.get('created')).datetime

        return created + ttl >= utcnow() and token.get('auth_token')

    def _get_auth_token(self, provider, update=False):
        """
        Gets authentication token for given provider instance and save it in db based on the given update flag.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :param update: a flag which dictates whether to save the authentication token in Ingest Provider record or not.
                       Saves if the value is True, defaults to False.
        :type update: bool
        :return: Authentication Token
        :rtype: str
        """
        token = provider.get('tokens')

        if token and self._is_valid_token(token):
            return token.get('auth_token')

        return self._generate_token_and_update_provider(provider) if update else ''
Exemple #12
0
class EventHTTPFeedingService(HTTPFeedingService):
    """
    Feeding Service class which can read events using HTTP
    """

    NAME = 'event_http'
    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiUnicodeError().get_error_description(),
        IngestApiError.apiParseError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description()
    ]

    label = 'Event HTTP feed'
    """
    Defines the collection service to be used with this ingest feeding service.
    """
    service = 'events'

    fields = [{
        'id': 'url',
        'type': 'text',
        'label': 'Feed URL',
        'placeholder': 'Feed URL',
        'required': True
    }]

    def _update(self, provider, update):
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(
                minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get('config')
        if not provider_config:
            provider_config = {}
            provider['config'] = provider_config

        self.URL = provider_config.get('url')
        payload = {}

        parser = self.get_feed_parser(provider)

        try:
            response = requests.get(self.URL, params=payload, timeout=15)
            # TODO: check if file has been updated since provider last_updated
            # although some ptovider do not include 'Last-Modified' in headers
            # so unsure how to do this
            logger.info('Http Headers: %s', response.headers)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        logger.info('Ingesting: %s', str(response.content))

        if isinstance(parser, NTBEventXMLFeedParser):
            xml = ET.fromstring(response.content)
            items = parser.parse(xml, provider)
        elif isinstance(parser, IcsTwoFeedParser):
            cal = Calendar.from_ical(response.content)
            items = parser.parse(cal, provider)
        else:
            items = parser.parse(response.content)

        if isinstance(items, list):
            yield items
        else:
            yield [items]
Exemple #13
0
class AAPSportsHTTPFeedingService(HTTPFeedingService):
    label = 'AAP Sports Results Feed'
    NAME = 'aap_sports_http'
    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiUnicodeError().get_error_description(),
        IngestApiError.apiParseError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description()
    ]
    """
    Defines the collection service to be used with this ingest feeding service.
    """
    service = 'events'

    fields = [
        {
            'id': 'login_url',
            'type': 'text',
            'label': 'Login Url',
            'placeholder': 'Login Url',
            'required': True,
            'errors': {
                4006: 'Server not found.',
                4000: 'Unexpected server response'
            }
        },
        {
            'id': 'fixtures_url',
            'type': 'text',
            'label': 'Fixtures Url',
            'placeholder': 'Fixtures Url',
            'required': True
        },
        {
            'id': 'username',
            'type': 'text',
            'label': 'Username',
            'placeholder': 'Username',
            'required': True
        },
        {
            'id': 'password',
            'type': 'password',
            'label': 'Password',
            'placeholder': 'Password',
            'required': True,
            'errors': {
                4007: 'Authentication error.'
            }
        },
        {
            'id': 'sports',
            'type': 'text',
            'label': 'Sports',
            'placeholder': 'Comma separate list of sports ids',
            'required': True,
            'default': '1,2,3,4,10'
        },
    ]

    def _update(self, provider, update):
        self.provider = provider
        parser = self.get_feed_parser(provider)

        # get the current year, it is used to filter fixtures for this year and next
        year = int(utcnow().year) % 100
        config = provider.get('config', {})
        content = self._request(
            config.get('login_url').format(config.get('username'),
                                           config.get('password')))
        # get the configured sports
        configured_sports = config.get('sports').split(',')
        xml = ET.fromstring(content)
        if xml.attrib['Status_Code'] == 'OK':
            session = xml.attrib['Status_Session']
            content = self._request(
                config.get('fixtures_url').format(session, '', '', ''))
            xml = ET.fromstring(content)
            for s in xml.findall('.//Sports/Sport'):
                sport_id = s.attrib['SportID']
                if sport_id not in configured_sports:
                    continue
                sport_name = s.attrib['SportName']
                content = self._request(
                    config.get('fixtures_url').format(session, sport_id, '',
                                                      ''))
                sport_xml = ET.fromstring(content)
                for c in sport_xml.findall('.//Competition'):
                    comp_id = c.attrib.get('Comp_ID')
                    comp_name = c.attrib.get('Comp_Name')
                    content = self._request(
                        config.get('fixtures_url').format(
                            session, sport_id, comp_id, ''))
                    comp_xml = ET.fromstring(content)
                    for season in comp_xml.findall('.//Season'):
                        season_id = season.attrib.get('SeasonID')
                        if str(year) in season_id or str(year +
                                                         1) in season_id:
                            content = self._request(
                                config.get('fixtures_url').format(
                                    session, sport_id, comp_id, season_id))
                            fixture_xml = ET.fromstring(content)
                            logger.info('Parsing {}/{} {}/{}'.format(
                                sport_id, sport_name, comp_id, comp_name))
                            items = parser.parse(
                                {
                                    'fixture_xml': fixture_xml,
                                    'sport_id': sport_id,
                                    'sport_name': sport_name,
                                    'comp_name': comp_name,
                                    'comp_id': comp_id
                                }, provider)
                            if len(items) > 0:
                                yield items

    def _request(self, url):
        try:
            response = requests.get(url, params={}, timeout=120)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found')

        return response.content
Exemple #14
0
from superdesk.utc import utcnow
from superdesk.etree import etree, ParseError
from superdesk.io import register_provider
from .newsml_2_0 import NewsMLTwoParser
from .reuters_token import get_token
from superdesk.errors import IngestApiError
from flask import current_app as app


PROVIDER = 'reuters'
errors = [IngestApiError.apiTimeoutError().get_error_description(),
          IngestApiError.apiRedirectError().get_error_description(),
          IngestApiError.apiRequestError().get_error_description(),
          IngestApiError.apiUnicodeError().get_error_description(),
          IngestApiError.apiParseError().get_error_description(),
          IngestApiError.apiGeneralError().get_error_description()]


class ReutersIngestService(IngestService):
    """Reuters ingest service."""

    DATE_FORMAT = '%Y.%m.%d.%H.%M'
    URL = 'http://rmb.reuters.com/rmd/rest/xml'
    token = None

    def __init__(self):
        self.parser = NewsMLTwoParser()

    def get_token(self):
        """Get reuters token once for an update run."""
Exemple #15
0
class ReutersHTTPFeedingService(HTTPFeedingService):
    """
    Feeding Service class which can read article(s) using HTTP provided by Reuters.
    """

    NAME = 'reuters_http'

    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiUnicodeError().get_error_description(),
        IngestApiError.apiParseError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description()
    ]

    DATE_FORMAT = '%Y.%m.%d.%H.%M'

    def _update(self, provider):
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(
                minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get('config')
        if not provider_config:
            provider_config = {}
            provider['config'] = provider_config

        if 'url' not in provider_config:
            provider_config['url'] = 'http://rmb.reuters.com/rmd/rest/xml'

        if 'auth_url' not in provider_config:
            provider_config[
                'auth_url'] = 'https://commerce.reuters.com/rmd/rest/xml/login'

        self.URL = provider_config.get('url')

        for channel in self._get_channels():
            ids = self._get_article_ids(channel, last_updated, updated)
            for id in ids:
                try:
                    items = self.fetch_ingest(id)
                    if items:
                        yield items
                # if there was an exception processing the one of the bunch log it and continue
                except Exception as ex:
                    logger.warn(
                        'Reuters item {} has not been retrieved'.format(id))
                    logger.exception(ex)

    def _get_channels(self):
        """Get subscribed channels."""
        channels = []
        tree = self._get_tree('channels')
        for channel in tree.findall('channelInformation'):
            channels.append(channel.find('alias').text)

        return channels

    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

    def _get_absolute_url(self, endpoint):
        """
        Get absolute URL for given endpoint.

        :param: endpoint
        :type endpoint: str
        """
        return '/'.join([self.URL, endpoint])

    def _get_article_ids(self, channel, last_updated, updated):
        """
        Get article ids which should be upserted also save the poll token that is returned.
        """
        ids = set()
        payload = {'channel': channel, 'fieldsRef': 'id'}

        # check if the channel has a pollToken if not fall back to dateRange
        last_poll_token = self._get_poll_token(channel)
        if last_poll_token is not None:
            logger.info(
                "Reuters requesting channel {} with poll token {}".format(
                    channel, last_poll_token))
            payload['pollToken'] = last_poll_token
        else:
            payload['dateRange'] = "%s-%s" % (self._format_date(last_updated),
                                              self._format_date(updated))
            logger.info(
                "Reuters requesting channel {} with dateRange {}".format(
                    channel, payload['dateRange']))

        tree = self._get_tree('items', payload)
        status_code = tree.find('status').get(
            'code') if tree.tag == 'results' else tree.get('code')
        # check the returned status
        if status_code != '10':
            logger.warn(
                "Reuters channel request returned status code {}".format(
                    status_code))
            # status code 30 indicates failure
            if status_code == '30':
                # invalid token
                logger.warn("Reuters error on channel {} code {} {}".format(
                    channel,
                    tree.find('error').get('code'),
                    tree.find('error').text))
                if tree.find('error').get('code') == '2100':
                    self._save_poll_token(channel, None)
                    logger.warn(
                        "Reuters channel invalid token reseting {}".format(
                            status_code))
                return ids

        # extract the returned poll token if there is one
        poll_token = tree.find('pollToken')
        if poll_token is not None:
            # a new token indicated new content
            if poll_token.text != last_poll_token:
                logger.info("Reuters channel {} new token {}".format(
                    channel, poll_token.text))
                self._save_poll_token(channel, poll_token.text)
            else:
                # the token has not changed, so nothing new
                logger.info("Reuters channel {} nothing new".format(channel))
                return ids
        else:
            logger.info(
                "Reuters channel {} retrieved no token".format(channel))
            return ids

        for result in tree.findall('result'):
            id = result.find('id').text
            ids.add(id)
            logger.info("Reuters id : {}".format(id))

        return ids

    def _save_poll_token(self, channel, poll_token):
        """
        Saves the poll token for the passed channel in the config section of the
        :param channel:
        :param poll_token:
        :return:
        """
        # get the provider in case it has been updated by another channel
        ingest_provider_service = superdesk.get_resource_service(
            'ingest_providers')
        provider = ingest_provider_service.find_one(
            req=None, _id=self.provider[superdesk.config.ID_FIELD])
        provider_token = provider.get('tokens')
        if 'poll_tokens' not in provider_token:
            provider_token['poll_tokens'] = {channel: poll_token}
        else:
            provider_token['poll_tokens'][channel] = poll_token
        upd_provider = {'tokens': provider_token}
        ingest_provider_service.system_update(
            self.provider[superdesk.config.ID_FIELD], upd_provider,
            self.provider)

    def _get_poll_token(self, channel):
        """
        Get the poll token from provider config if it is available.
        :param channel:
        :return: token
        """
        if 'tokens' in self.provider and 'poll_tokens' in self.provider[
                'tokens']:
            return self.provider.get('tokens').get('poll_tokens').get(
                channel, None)

    def _format_date(self, date):
        return date.strftime(self.DATE_FORMAT)

    def fetch_ingest(self, id):
        items = self._parse_items(id)
        result_items = []
        while items:
            item = items.pop()
            self.add_timestamps(item)
            try:
                items.extend(self._fetch_items_in_package(item))
                result_items.append(item)
            except LookupError as err:
                self.log_item_error(err, item, self.provider)
                return []

        return result_items

    def _parse_items(self, id):
        """
        Parse item message and return given items.
        """

        payload = {'id': id}
        tree = self._get_tree('item', payload)

        parser = self.get_feed_parser(self.provider, tree)
        items = parser.parse(tree, self.provider)

        return items

    def _fetch_items_in_package(self, item):
        """
        Fetch remote assets for given item.
        """
        items = []
        for group in item.get('groups', []):
            for ref in group.get('refs', []):
                if 'residRef' in ref:
                    items.extend(self._parse_items(ref.get('residRef')))

        return items

    def prepare_href(self, href, mimetype=None):
        (scheme, netloc, path, params, query, fragment) = urlparse(href)
        new_href = urlunparse((scheme, netloc, path, '', '', ''))
        return '%s?auth_token=%s' % (
            new_href, self._get_auth_token(self.provider, update=True))
Exemple #16
0
class ReutersHTTPFeedingService(HTTPFeedingService):
    """
    Feeding Service class which can read article(s) using HTTP provided by Reuters.
    """

    NAME = "reuters_http"

    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiUnicodeError().get_error_description(),
        IngestApiError.apiParseError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description(),
    ]

    DATE_FORMAT = "%Y.%m.%d.%H.%M"

    label = "Reuters feed API"

    fields = [
        {
            "id": "url",
            "type": "text",
            "label": "Feed URL",
            "placeholder": "Feed URL",
            "required": True,
            "default": "http://rmb.reuters.com/rmd/rest/xml",
        },
        {
            "id": "auth_url",
            "type": "text",
            "label": "URL for Authentication",
            "placeholder": "authentication url",
            "required": True,
            "default": "https://commerce.reuters.com/rmd/rest/xml/login",
        },
        {"id": "username", "type": "text", "label": "Username", "placeholder": "Username", "required": True},
        {"id": "password", "type": "password", "label": "Password", "placeholder": "Password", "required": True},
    ]

    session = None

    def _update(self, provider, update):
        updated = utcnow()

        last_updated = provider.get("last_updated")
        ttl_minutes = app.config["INGEST_EXPIRY_MINUTES"]
        if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        self.provider = provider
        provider_config = provider.get("config")
        if not provider_config:
            provider_config = {}
            provider["config"] = provider_config

        provider_config.setdefault("url", "http://rmb.reuters.com/rmd/rest/xml")
        provider_config.setdefault("auth_url", "https://commerce.reuters.com/rmd/rest/xml/login")
        self.URL = provider_config.get("url")

        for channel in self._get_channels():
            ids = self._get_article_ids(channel, last_updated, updated)
            for id in ids:
                try:
                    items = self.fetch_ingest(id)
                    if items:
                        yield items
                # if there was an exception processing the one of the bunch log it and continue
                except Exception as ex:
                    logger.warn("Reuters item {} has not been retrieved".format(id))
                    logger.exception(ex)

    def _get_channels(self):
        """Get subscribed channels."""
        channels = []
        tree = self._get_tree("channels")
        for channel in tree.findall("channelInformation"):
            channels.append(channel.find("alias").text)

        return channels

    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload["token"] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url, params=payload, timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn("Reuters API timeout retrying, retries {}".format(retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError(_("Not found {payload}").format(payload=payload))

            break

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

    def _get_absolute_url(self, endpoint):
        """
        Get absolute URL for given endpoint.

        :param: endpoint
        :type endpoint: str
        """
        return "/".join([self.URL, endpoint])

    def _get_article_ids(self, channel, last_updated, updated):
        """
        Get article ids which should be upserted also save the poll token that is returned.
        """
        ids = set()
        payload = {"channel": channel, "fieldsRef": "id"}

        # check if the channel has a pollToken if not fall back to dateRange
        last_poll_token = self._get_poll_token(channel)
        if last_poll_token is not None:
            logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token))
            payload["pollToken"] = last_poll_token
        else:
            payload["dateRange"] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated))
            logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload["dateRange"]))

        tree = self._get_tree("items", payload)
        status_code = tree.find("status").get("code") if tree.tag == "results" else tree.get("code")
        # check the returned status
        if status_code != "10":
            logger.warn("Reuters channel request returned status code {}".format(status_code))
            # status code 30 indicates failure
            if status_code == "30":
                # invalid token
                logger.warn(
                    "Reuters error on channel {} code {} {}".format(
                        channel, tree.find("error").get("code"), tree.find("error").text
                    )
                )
                if tree.find("error").get("code") == "2100":
                    self._save_poll_token(channel, None)
                    logger.warn("Reuters channel invalid token reseting {}".format(status_code))
                return ids

        # extract the returned poll token if there is one
        poll_token = tree.find("pollToken")
        if poll_token is not None:
            # a new token indicated new content
            if poll_token.text != last_poll_token:
                logger.info("Reuters channel {} new token {}".format(channel, poll_token.text))
                self._save_poll_token(channel, poll_token.text)
            else:
                # the token has not changed, so nothing new
                logger.info("Reuters channel {} nothing new".format(channel))
                return ids
        else:
            logger.info("Reuters channel {} retrieved no token".format(channel))
            return ids

        for result in tree.findall("result"):
            id = result.find("id").text
            ids.add(id)
            logger.info("Reuters id : {}".format(id))

        return ids

    def _save_poll_token(self, channel, poll_token):
        """Saves the poll token for the passed channel in the config section of the

        :param channel:
        :param poll_token:
        :return:
        """
        # get the provider in case it has been updated by another channel
        ingest_provider_service = superdesk.get_resource_service("ingest_providers")
        provider = ingest_provider_service.find_one(req=None, _id=self.provider[superdesk.config.ID_FIELD])
        provider_token = provider.get("tokens")
        if "poll_tokens" not in provider_token:
            provider_token["poll_tokens"] = {channel: poll_token}
        else:
            provider_token["poll_tokens"][channel] = poll_token
        upd_provider = {"tokens": provider_token}
        ingest_provider_service.system_update(self.provider[superdesk.config.ID_FIELD], upd_provider, self.provider)

    def _get_poll_token(self, channel):
        """Get the poll token from provider config if it is available.

        :param channel:
        :return: token
        """
        if "tokens" in self.provider and "poll_tokens" in self.provider["tokens"]:
            return self.provider.get("tokens").get("poll_tokens").get(channel, None)

    def _format_date(self, date):
        return date.strftime(self.DATE_FORMAT)

    def fetch_ingest(self, id):
        items = self._parse_items(id)
        result_items = []
        while items:
            item = items.pop()
            self.localize_timestamps(item)
            try:
                items.extend(self._fetch_items_in_package(item))
                result_items.append(item)
            except LookupError as err:
                self.log_item_error(err, item, self.provider)
                return []

        return result_items

    def _parse_items(self, id):
        """
        Parse item message and return given items.
        """

        payload = {"id": id}
        tree = self._get_tree("item", payload)

        parser = self.get_feed_parser(self.provider, tree)
        items = parser.parse(tree, self.provider)

        return items

    def _fetch_items_in_package(self, item):
        """
        Fetch remote assets for given item.
        """
        items = []
        for group in item.get("groups", []):
            for ref in group.get("refs", []):
                if "residRef" in ref:
                    items.extend(self._parse_items(ref.get("residRef")))

        return items

    def prepare_href(self, href, mimetype=None):
        (scheme, netloc, path, params, query, fragment) = urlparse(href)
        new_href = urlunparse((scheme, netloc, path, "", "", ""))
        return "%s?auth_token=%s" % (new_href, self._get_auth_token(self.provider, update=True))
Exemple #17
0
class ReutersIngestService(IngestService):
    """Reuters ingest service."""

    PROVIDER = 'reuters'

    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRedirectError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiUnicodeError().get_error_description(),
        IngestApiError.apiParseError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description()
    ]

    DATE_FORMAT = '%Y.%m.%d.%H.%M'
    URL = 'http://rmb.reuters.com/rmd/rest/xml'
    token = None

    def __init__(self):
        self.parser = NewsMLTwoParser()

    def get_token(self):
        """Get reuters token once for an update run."""
        if not self.token:
            self.token = get_token(self.provider, update=True)
        return self.token

    def _update(self, provider):
        """Service update call."""
        self.provider = provider
        updated = utcnow()

        last_updated = provider.get('last_updated')
        ttl_minutes = app.config['INGEST_EXPIRY_MINUTES']
        if not last_updated or last_updated < updated - datetime.timedelta(
                minutes=ttl_minutes):
            last_updated = updated - datetime.timedelta(minutes=ttl_minutes)

        for channel in self.get_channels():
            for guid in self.get_ids(channel, last_updated, updated):
                items = self.fetch_ingest(guid)
                if items:
                    yield items

    def fetch_ingest(self, guid):
        items = self.get_items(guid)
        result_items = []
        while items:
            item = items.pop()
            self.add_timestamps(item)
            try:
                items.extend(self.fetch_assets(item))
                result_items.append(item)
            except LookupError as err:
                self.log_item_error(err, item, self.provider)
                return []
        return result_items

    def fetch_assets(self, item):
        """Fetch remote assets for given item."""
        items = []
        for group in item.get('groups', []):
            for ref in group.get('refs', []):
                if 'residRef' in ref:
                    items.extend(self.get_items(ref.get('residRef')))
        return items

    def get_items(self, guid):
        """Parse item message and return given items."""
        payload = {'id': guid}
        tree = self.get_tree('item', payload)
        items = self.parser.parse_message(tree, self.provider)
        return items

    def get_ids(self, channel, last_updated, updated):
        """Get ids of documents which should be updated."""
        ids = []
        payload = {'channel': channel, 'fieldsRef': 'id'}
        payload['dateRange'] = "%s-%s" % (self.format_date(last_updated),
                                          self.format_date(updated))
        tree = self.get_tree('items', payload)
        for result in tree.findall('result'):
            ids.append(result.find('guid').text)
        return ids

    def get_channels(self):
        """Get subscribed channels."""
        channels = []
        tree = self.get_tree('channels')
        for channel in tree.findall('channelInformation'):
            channels.append(channel.find('alias').text)
        return channels

    def get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload."""
        if payload is None:
            payload = {}
        payload['token'] = self.get_token()
        url = self.get_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            # workaround for httmock lib
            # return etree.fromstring(response.text.encode('utf-8'))
            return etree.fromstring(response.content)
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

    def get_url(self, endpoint):
        """Get API url for given endpoint."""
        return '/'.join([self.URL, endpoint])

    def format_date(self, date):
        """Format date for API usage."""
        return date.strftime(self.DATE_FORMAT)

    def prepare_href(self, href):
        (scheme, netloc, path, params, query, fragment) = urlparse(href)
        new_href = urlunparse((scheme, netloc, path, '', '', ''))
        return '%s?auth_token=%s' % (new_href, self.get_token())