Esempio n. 1
0
    def validate_config(self):
        """
        Validate provider config according to `cls.fields`

        :param config: Ingest provider configuration
        :type config: dict
        :return:
        """
        # validate required config fields
        required_keys = [
            field['id'] for field in self.fields
            if field.get('required', False)
        ]
        if not set(self.config.keys()).issuperset(required_keys):
            raise SuperdeskIngestError.notConfiguredError(
                Exception('{} are required.'.format(', '.join(required_keys))))

        url = self.config.get('url').strip()
        if not url:
            try:
                url_field = next({f for f in self.fields if f['id'] == u'url'})
            except StopIteration:
                url_required = False
            else:
                url_required = url_field.get('required', False)
            if url_required:
                raise SuperdeskIngestError.notConfiguredError(
                    Exception('URL is a required field.'))
        else:
            # validate url
            if not url.startswith('http'):
                raise SuperdeskIngestError.notConfiguredError(
                    Exception('URL must be a valid HTTP link.'))
Esempio n. 2
0
    def _update(self, provider, update):
        config = self.config
        try:
            user, password = self.config["username"], self.config["password"]
        except KeyError:
            SuperdeskIngestError.notConfiguredError(
                Exception("username and password are needed"))

        url_override = config.get("url", "").strip()
        if not url_override.startswith("http"):
            SuperdeskIngestError.notConfiguredError(
                Exception("if URL is set, it must be a valid http link"))

        if url_override:
            params = {"user": user, "password": password, "maksAntal": 50}
        else:
            params = {
                "user": user,
                "password": password,
                "maksAntal": 50,
                "waitAcknowledge": "true"
            }

        r = self.get_url(url_override, params=params)

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(
                Exception("error while parsing the request answer"))

        try:
            if root_elt.xpath("(//error/text())[1]")[0] != "0":
                err_msg = root_elt.xpath("(//errormsg/text())[1]")[0]
                raise IngestApiError.apiRequestError(
                    Exception("error code returned by API: {msg}".format(
                        msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(
                Exception("Invalid XML, <error> element not found"))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath("//RBNews"):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath(".//ServiceQueueId/text()")[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(
                        Exception("missing ServiceQueueId element"))
                ack_params = {
                    "user": user,
                    "password": password,
                    "servicequeueid": queue_id
                }
                self.get_url(URL_ACK, params=ack_params)

        return [items]
Esempio n. 3
0
    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
            id_list = config['idList']
        except KeyError as e:
            SuperdeskIngestError.notConfiguredError(
                Exception('username, password and idList are needed'))

        # we remove spaces and empty values from id_list to do a clean list
        id_list = ','.join(
            [id_.strip() for id_ in id_list.split(',') if id_.strip()])

        params = {
            'idList': id_list,
            'idListType': 'products',
            'format': '5',
            'maxItems': '25',
            'sortOrder': 'chronological'
        }
        try:
            min_date_time = provider['private']['min_date_time']
            sequence_number = provider['private']['sequence_number']
        except KeyError:
            pass
        else:
            params['minDateTime'] = min_date_time
            params['sequenceNumber'] = sequence_number

        try:
            r = requests.get(URL, auth=(user, password), params=params)
        except Exception as e:
            raise IngestApiError.apiRequestError(
                Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.content)
        except Exception as e:
            raise IngestApiError.apiRequestError(
                Exception('error while doing the request'))

        parser = self.get_feed_parser(provider)
        items = parser.parse(root_elt, provider)

        try:
            min_date_time = root_elt.xpath(
                '//iptc:timestamp[@role="minDateTime"]/text()',
                namespaces=NS)[0].strip()
            sequence_number = root_elt.xpath('//iptc:transmitId/text()',
                                             namespaces=NS)[0].strip()
        except IndexError:
            raise IngestApiError.apiRequestError(
                Exception('missing minDateTime or transmitId'))
        else:
            update.setdefault('private', {})
            update['private']['min_date_time'] = min_date_time
            update['private']['sequence_number'] = sequence_number

        return [items]
Esempio n. 4
0
    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
        except KeyError:
            SuperdeskIngestError.notConfiguredError(Exception('username and password are needed'))

        url_override = config.get('url', '').strip()
        if not url_override.startswith('http'):
            SuperdeskIngestError.notConfiguredError(Exception('if URL is set, it must be a valid http link'))

        if url_override:
            params = {'user': user, 'password': password, 'maksAntal': 50}
        else:
            params = {'user': user, 'password': password, 'maksAntal': 50, 'waitAcknowledge': 'true'}

        try:
            r = requests.get(url_override or URL, params=params)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while parsing the request answer'))

        try:
            if root_elt.xpath('(//error/text())[1]')[0] != '0':
                err_msg = root_elt.xpath('(//errormsg/text())[1]')[0]
                raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found'))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath('//RBNews'):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath('.//ServiceQueueId/text()')[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element'))
                ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id}
                try:
                    requests.get(URL_ACK, params=ack_params)
                except Exception:
                    raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        return [items]
Esempio n. 5
0
    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
        except KeyError:
            SuperdeskIngestError.notConfiguredError(Exception('username and password are needed'))

        url_override = config.get('url', '').strip()
        if not url_override.startswith('http'):
            SuperdeskIngestError.notConfiguredError(Exception('if URL is set, it must be a valid http link'))

        if url_override:
            params = {'user': user, 'password': password, 'maksAntal': 50}
        else:
            params = {'user': user, 'password': password, 'maksAntal': 50, 'waitAcknowledge': 'true'}

        try:
            r = requests.get(url_override or URL, params=params)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while parsing the request answer'))

        try:
            if root_elt.xpath('(//error/text())[1]')[0] != '0':
                err_msg = root_elt.xpath('(//errormsg/text())[1]')[0]
                raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found'))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath('//RBNews'):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath('.//ServiceQueueId/text()')[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element'))
                ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id}
                try:
                    requests.get(URL_ACK, params=ack_params)
                except Exception:
                    raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        return [items]
Esempio n. 6
0
    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
            id_list = config['idList']
            # before "products" was hardcoded as value for "idListType"
            id_list_type = config.get('idListType', 'products')
            if not user.strip() or not password.strip() or not id_list.strip():
                raise KeyError
        except KeyError:
            raise SuperdeskIngestError.notConfiguredError(Exception('username, password and idList are needed'))

        # we remove spaces and empty values from id_list to do a clean list
        id_list = ','.join([id_.strip() for id_ in id_list.split(',') if id_.strip()])

        params = {'idList': id_list,
                  'idListType': id_list_type,
                  'format': '5',
                  'maxItems': '25',
                  'sortOrder': 'chronological'}
        try:
            min_date_time = provider['private']['min_date_time']
            sequence_number = provider['private']['sequence_number']
        except KeyError:
            pass
        else:
            params['minDateTime'] = min_date_time
            params['sequenceNumber'] = sequence_number

        try:
            r = requests.get(URL, auth=(user, password), params=params)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.content)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        parser = self.get_feed_parser(provider)
        items = parser.parse(root_elt, provider)

        try:
            min_date_time = root_elt.xpath('//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip()
            sequence_number = root_elt.xpath('//iptc:transmitId/text()', namespaces=NS)[0].strip()
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('missing minDateTime or transmitId'))
        else:
            update.setdefault('private', {})
            update['private']['min_date_time'] = min_date_time
            update['private']['sequence_number'] = sequence_number

        return [items]
    def validate_config(self):
        """
        Validate provider config according to `cls.fields`

        :param config: Ingest provider configuration
        :type config: dict
        :return:
        """
        # validate required config fields
        required_keys = [field['id'] for field in self.fields if field.get('required', False)]
        if not set(self.config.keys()).issuperset(required_keys):
            raise SuperdeskIngestError.notConfiguredError(
                Exception('{} are required.'.format(', '.join(required_keys)))
            )

        # validate url
        url = self.config.get('url')
        if url and not url.strip().startswith('http'):
            raise SuperdeskIngestError.notConfiguredError(
                Exception('URL must be a valid HTTP link.')
            )
Esempio n. 8
0
    def validate_config(self):
        """
        Validate provider config according to `cls.fields`

        :param config: Ingest provider configuration
        :type config: dict
        :return:
        """
        # validate required config fields
        required_keys = [
            field["id"] for field in self.fields
            if field.get("required", False)
        ]
        if not set(self.config.keys()).issuperset(required_keys):
            raise SuperdeskIngestError.notConfiguredError(
                Exception("{} are required.".format(", ".join(required_keys))))

        # validate url
        url = self.config.get("url")
        if url and not url.strip().startswith("http"):
            raise SuperdeskIngestError.notConfiguredError(
                Exception("URL must be a valid HTTP link."))
Esempio n. 9
0
class NewsworthyFeedingService(FeedingService):
    """
    Feeding Service class which can retrieve articles from Newsworthy web service
    """

    NAME = 'newsworthy'

    ERRORS = [IngestApiError.apiRequestError().get_error_description(),
              SuperdeskIngestError.notConfiguredError().get_error_description()]

    label = 'Newsworthy'

    fields = [
        {
            'id': 'url', 'type': 'text', 'label': 'Use this URL for webhook',
            'default_value': '',
            'readonly': True,
        },
        {
            'id': 'username', 'type': 'text', 'label': 'Username',
            'required': True
        },
        {
            'id': 'password', 'type': 'password', 'label': 'Password',
            'required': True
        },
        {
            'id': 'secret', 'type': 'password', 'label': 'Shared Secret',
            'placeholder': 'Shared Secret', 'required': False
        },
    ]

    def _update(self, provider, update):
        try:
            data = provider['newsworthy_data']
        except IndexError:
            return [[]]
        if data['hook']['event'] == EVENT_UNPUBLISHED:
            logger.info("ignoring unpublish event on following data:\n{data}".format(data=data))
            return [[]]

        # we have to write to a temporary file because feed parser expect a file path
        # FIXME: it would be better to use the data directly
        with NamedTemporaryFile('w') as f:
            json.dump(data['data'], f)
            f.seek(0)
            parser = self.get_feed_parser(provider, f.name)
            items = parser.parse(f.name, provider)

        return [items]
Esempio n. 10
0
    def parse(self, data, provider=None):
        if self.subjects_map is None:
            self._set_metadata()
        try:
            stage_map = config.NIFS_STAGE_MAP
            qcode_map = config.NIFS_QCODE_MAP
        except KeyError:
            raise SuperdeskIngestError.notConfiguredError(
                Exception('NIFS maps are not found in settings'))
        events = json.loads(data.decode('utf-8', 'ignore'))
        items = []
        try:
            for event in events:
                stage = stage_map.get(event['stageId'], '')

                # we retrieve qcode from sportId, according to config, and sport name
                try:
                    qcode = qcode_map[event['sportId']]
                except KeyError:
                    logger.warning(
                        'no qcode registered for sportId {sport_id}'.format(
                            sport_id=event['sportId']))
                    qcode = ''
                    sport = ''
                else:
                    sport = self.get_sport(qcode)

                # name as requested by NTB
                if stage or sport:
                    tpl_name = '{sport} {stage}, {rnd}. runde, {home} - {away}'
                else:
                    tpl_name = '{rnd}. runde, {home} - {away}'

                name = tpl_name.format(stage=stage,
                                       sport=sport,
                                       rnd=event['round'],
                                       home=event['homeTeam']['name'],
                                       away=event['awayTeam']['name']).strip()

                event_start = dateutil.parser.parse(event['timestamp'])
                # there is no end time specified in event
                event_end = event_start + timedelta(hours=2)

                # we have a common category and subject + a subject per sport
                # cf. SDNTB-496
                subject = [{'qcode': CAT, 'name': CAT, 'scheme': 'category'}]
                subject.append({
                    'qcode':
                    MAIN_SUBJ_QCODE,
                    'name':
                    self.subjects_map.get(MAIN_SUBJ_QCODE, ''),
                    'scheme':
                    'subject_custom'
                })
                subject.append({
                    'qcode': qcode,
                    'name': sport,
                    'scheme': 'subject_custom'
                })

                service = {'qcode': SERVICE_QCODE, 'name': self.service_name}

                item = {
                    'guid': event['uid'],
                    ITEM_TYPE: CONTENT_TYPE.EVENT,
                    'dates': {
                        'start': event_start,
                        'end': event_end,
                        'tz': ''
                    },
                    'name': name,
                    'slugline': sport,
                    'subject': subject,
                    'anpa_category': [service],
                    'calendars': [self.calendar_item],
                    'firstcreated': utcnow(),
                    'versioncreated': utcnow()
                }
                items.append(item)
            return items
        except Exception as ex:
            raise ParserError.parseMessageError(ex, provider)
Esempio n. 11
0
    def _update(self, provider, update):
        try:
            config = provider["config"]
            id_list = config["idList"]
            # before "products" was hardcoded as value for "idListType"
            id_list_type = config.get("idListType", "products")
            if not id_list.strip():
                raise KeyError
        except KeyError:
            raise SuperdeskIngestError.notConfiguredError(Exception("idList is needed"))

        # we check if the provider has been closed since the last update
        try:
            last_closed = provider["last_closed"]["closed_at"]
            last_updated = provider["last_updated"]
        except KeyError:
            pass
        else:
            if last_closed > last_updated and "private" in provider:
                # we reset the private data so only last page of items will be retrieved (cf. SDESK-4372)
                logger.info("reseting private data for provider {source}".format(source=provider.get("source")))
                del provider["private"]

        # we remove spaces and empty values from id_list to do a clean list
        id_list = ",".join([id_.strip() for id_ in id_list.split(",") if id_.strip()])

        params = {
            "idList": id_list,
            "idListType": id_list_type,
            "format": "5",
            "maxItems": "25",
        }
        try:
            min_date_time = provider["private"]["min_date_time"]
            sequence_number = provider["private"]["sequence_number"]
        except KeyError:
            # the provider is new or re-opened, we want last items
            # so we need reverse-chronological order
            chronological = False
        else:
            params["minDateTime"] = min_date_time
            params["sequenceNumber"] = sequence_number
            params["sortOrder"] = "chronological"
            chronological = True

        r = self.get_url(params=params)

        try:
            root_elt = etree.fromstring(r.content)
        except Exception:
            raise IngestApiError.apiRequestError(Exception("error while doing the request"))

        parser = self.get_feed_parser(provider)
        items = parser.parse(root_elt, provider)
        if not chronological:
            items.reverse()

        try:
            min_date_time = root_elt.xpath('//iptc:timestamp[@role="minDateTime"]/text()', namespaces=NS)[0].strip()
            sequence_number = root_elt.xpath("//iptc:transmitId/text()", namespaces=NS)[0].strip()
        except IndexError:
            raise IngestApiError.apiRequestError(Exception("missing minDateTime or transmitId"))
        else:
            update.setdefault("private", {})
            update["private"]["min_date_time"] = min_date_time
            update["private"]["sequence_number"] = sequence_number

        return [items]
Esempio n. 12
0
class NTBEventsApiFeedingService(HTTPFeedingServiceBase):
    """
    Feeding Service class which can read events from NTB API using HTTP
    """

    NAME = 'ntb_events_api'
    ERRORS = [
        SuperdeskIngestError.notConfiguredError().get_error_description(),
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiConnectionError().get_error_description(),
    ]
    REQUESTS_PER_UPDATE = 4
    EVENTS_PER_REQUEST = 25
    HTTP_TIMEOUT = 20

    label = 'NTB Events API'
    fields = [
        {
            'id': 'url', 'type': 'text', 'label': 'Feed URL',
            'placeholder': 'Feed URL', 'required': True
        }] + HTTPFeedingServiceBase.AUTH_FIELDS
    service = 'events'

    def _update(self, provider, update):
        """
        Fetch events from external API.

        :param provider: Ingest Provider Details.
        :type provider: dict
        :param update: Any update that is required on provider.
        :type update: dict
        :return: a list of events which can be saved.
        """
        all_items = OrderedDict()
        self._provider = provider
        provider_private = self._provider.get('private', {})
        offset = provider_private.get('search', {}).get('offset', 0)

        for _ in range(self.REQUESTS_PER_UPDATE):
            response = self._send_request(offset + len(all_items))
            xml = etree.fromstring(response.content)
            items = self._parse_events(xml=xml)

            if items:
                all_items.update(items)
            else:
                break

        if all_items:
            update['private'] = {
                'search': {
                    'offset': offset + len(all_items)
                }
            }
            all_items = self._filter_items(all_items)
        else:
            update['is_closed'] = True
            update['last_closed'] = {
                'closed_at': utcnow(),
                'message': 'Ingesting was finished.'
            }

        return [all_items]

    def _send_request(self, offset):
        """
        Execute http request to external API

        :param offset: offset provided in request payload
        :type offset: int
        :return: http response
        :raises IngestApiError.apiTimeoutError
        :raises IngestApiError.apiConnectionError
        :raises IngestApiError.apiRequestError
        :raises IngestApiError.apiGeneralError
        :raises IngestApiError.apiAuthError
        :raises IngestApiError.apiNotFoundError
        """
        payload = {
            'search.offset': offset,
            'search.showNumResults': self.EVENTS_PER_REQUEST
        }
        url = self._provider['config']['url'].strip()

        return self.get_url(url, params=payload)

    def _parse_events(self, xml):
        """
        Parse xml document and returns list of events

        :param xml: xml document
        :type xml: lxml.etree._Element
        :return: a list of events
        """
        parser = self.get_feed_parser(self._provider, article=xml)
        return OrderedDict(
            (item['ntb_id'], item) for item in parser.parse(xml)
        )

    def _filter_items(self, items):
        """
        Remove events which are exist in the db.

        :param items: dict with events, ntbId used as a key
        :type items: dict
        :return: a list of events
        """

        req = ParsedRequest()
        req.projection = json.dumps({'ntb_id': 1, 'guid': 1, ITEM_STATE: 1})
        req.max_results = len(items)

        existing_items = superdesk.get_resource_service('events').get_from_mongo(
            req,
            {
                'ntb_id': {
                    '$in': [ntb_id for ntb_id in items.keys()]
                }
            }
        )
        for existing_item in existing_items:
            if existing_item.get(ITEM_STATE) == WORKFLOW_STATE.INGESTED:
                # update event
                items[existing_item['ntb_id']][GUID_FIELD] = existing_item[GUID_FIELD]
            else:
                # remove event when it has a state different from 'ingested'
                del items[existing_item['ntb_id']]

        return [items[i] for i in items.keys()]
Esempio n. 13
0
class APFeedingService(FeedingService):
    """
    Feeding Service class which can retrieve articles from Associated Press web service
    """

    NAME = 'ap'

    ERRORS = [
        IngestApiError.apiRequestError().get_error_description(),
        SuperdeskIngestError.notConfiguredError().get_error_description()
    ]

    label = 'AP feed API'

    fields = [{
        'id': 'username',
        'type': 'text',
        'label': 'Username',
        'placeholder': 'Username',
        'required': True
    }, {
        'id': 'password',
        'type': 'password',
        'label': 'Password',
        'placeholder': 'Password',
        'required': True
    }, {
        'id': 'idList',
        'type': 'text',
        'label': 'Id List',
        'placeholder': 'use coma separated ids for multiple values',
        'required': False
    }]

    def config_test(self, provider=None):
        super().config_test(provider)

    def _update(self, provider, update):
        try:
            config = provider['config']
            user = config['username']
            password = config['password']
            id_list = config['idList']
            if not user.strip() or not password.strip() or not id_list.strip():
                raise KeyError
        except KeyError:
            raise SuperdeskIngestError.notConfiguredError(
                Exception('username, password and idList are needed'))

        # we remove spaces and empty values from id_list to do a clean list
        id_list = ','.join(
            [id_.strip() for id_ in id_list.split(',') if id_.strip()])

        params = {
            'idList': id_list,
            'idListType': 'products',
            'format': '5',
            'maxItems': '25',
            'sortOrder': 'chronological'
        }
        try:
            min_date_time = provider['private']['min_date_time']
            sequence_number = provider['private']['sequence_number']
        except KeyError:
            pass
        else:
            params['minDateTime'] = min_date_time
            params['sequenceNumber'] = sequence_number

        try:
            r = requests.get(URL, auth=(user, password), params=params)
        except Exception:
            raise IngestApiError.apiRequestError(
                Exception('error while doing the request'))

        try:
            root_elt = etree.fromstring(r.content)
        except Exception:
            raise IngestApiError.apiRequestError(
                Exception('error while doing the request'))

        parser = self.get_feed_parser(provider)
        items = parser.parse(root_elt, provider)

        try:
            min_date_time = root_elt.xpath(
                '//iptc:timestamp[@role="minDateTime"]/text()',
                namespaces=NS)[0].strip()
            sequence_number = root_elt.xpath('//iptc:transmitId/text()',
                                             namespaces=NS)[0].strip()
        except IndexError:
            raise IngestApiError.apiRequestError(
                Exception('missing minDateTime or transmitId'))
        else:
            update.setdefault('private', {})
            update['private']['min_date_time'] = min_date_time
            update['private']['sequence_number'] = sequence_number

        return [items]
Esempio n. 14
0
    def get_url(self, url=None, **kwargs):
        """Do an HTTP Get on URL

        :param string url: url to use (None to use self.HTTP_URL)
        :param **kwargs: extra parameter for requests
        :return requests.Response: response
        """
        if not url:
            url = self.HTTP_URL
        config = self.config
        user = config.get('username')
        password = config.get('password')
        if user:
            user = user.strip()
        if password:
            password = password.strip()

        auth_required = config.get('auth_required', self.HTTP_AUTH)
        if auth_required is None:
            # auth_required may not be user in the feeding service
            # in this case with use authentification only if user
            # and password are set.
            auth_required = bool(user and password)

        if auth_required:
            if not user:
                raise SuperdeskIngestError.notConfiguredError(
                    "user is not configured")
            if not password:
                raise SuperdeskIngestError.notConfiguredError(
                    "password is not configured")
            kwargs.setdefault('auth', (user, password))

        params = kwargs.pop("params", {})
        if params or self.HTTP_DEFAULT_PARAMETERS:
            # if we have default parameters, we want them to be overriden
            # by conflicting params given in arguments
            if self.HTTP_DEFAULT_PARAMETERS:
                params.update(self.HTTP_DEFAULT_PARAMETERS)
            kwargs["params"] = params

        try:
            response = requests.get(url, timeout=self.HTTP_TIMEOUT, **kwargs)
        except requests.exceptions.Timeout as exception:
            raise IngestApiError.apiTimeoutError(exception, self.provider)
        except requests.exceptions.ConnectionError as exception:
            raise IngestApiError.apiConnectionError(exception, self.provider)
        except requests.exceptions.RequestException as exception:
            raise IngestApiError.apiRequestError(exception, self.provider)
        except Exception as exception:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(exception, self.provider)

        if not response.ok:
            exception = Exception(response.reason)
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(exception, self.provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(exception, self.provider)
            else:
                raise IngestApiError.apiGeneralError(exception, self.provider)

        return response
Esempio n. 15
0
class RitzauFeedingService(HTTPFeedingServiceBase):
    """
    Feeding Service class which can retrieve articles from Ritzau web service
    """

    NAME = "ritzau"

    ERRORS = [
        IngestApiError.apiRequestError().get_error_description(),
        SuperdeskIngestError.notConfiguredError().get_error_description(),
    ]

    label = "Ritzau feed API"

    fields = HTTPFeedingServiceBase.AUTH_FIELDS + [{
        "id": "url",
        "type": "text",
        "label": "URL",
        "placeholder": "fill this field only for advanced uses",
        "required": False,
    }]

    HTTP_URL = "https://services.ritzau.dk/ritzaurest/Services.svc/xml/news/NewsQueue"
    # auth is done with params
    HTTP_AUTH = False

    def _update(self, provider, update):
        config = self.config
        try:
            user, password = self.config["username"], self.config["password"]
        except KeyError:
            SuperdeskIngestError.notConfiguredError(
                Exception("username and password are needed"))

        url_override = config.get("url", "").strip()
        if not url_override.startswith("http"):
            SuperdeskIngestError.notConfiguredError(
                Exception("if URL is set, it must be a valid http link"))

        if url_override:
            params = {"user": user, "password": password, "maksAntal": 50}
        else:
            params = {
                "user": user,
                "password": password,
                "maksAntal": 50,
                "waitAcknowledge": "true"
            }

        r = self.get_url(url_override, params=params)

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(
                Exception("error while parsing the request answer"))

        try:
            if root_elt.xpath("(//error/text())[1]")[0] != "0":
                err_msg = root_elt.xpath("(//errormsg/text())[1]")[0]
                raise IngestApiError.apiRequestError(
                    Exception("error code returned by API: {msg}".format(
                        msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(
                Exception("Invalid XML, <error> element not found"))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath("//RBNews"):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath(".//ServiceQueueId/text()")[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(
                        Exception("missing ServiceQueueId element"))
                ack_params = {
                    "user": user,
                    "password": password,
                    "servicequeueid": queue_id
                }
                self.get_url(URL_ACK, params=ack_params)

        return [items]
Esempio n. 16
0
class RitzauFeedingService(HTTPFeedingServiceBase):
    """
    Feeding Service class which can retrieve articles from Ritzau web service
    """

    NAME = 'ritzau'

    ERRORS = [IngestApiError.apiRequestError().get_error_description(),
              SuperdeskIngestError.notConfiguredError().get_error_description()]

    label = 'Ritzau feed API'

    fields = HTTPFeedingServiceBase.AUTH_FIELDS + [
        {
            'id': 'url', 'type': 'text', 'label': 'URL',
            'placeholder': 'fill this field only for advanced uses', 'required': False
        }
    ]

    HTTP_URL = 'https://services.ritzau.dk/ritzaurest/Services.svc/xml/news/NewsQueue'
    # auth is done with params
    HTTP_AUTH = False

    def _update(self, provider, update):
        config = self.config
        try:
            user, password = self.config['username'], self.config['password']
        except KeyError:
            SuperdeskIngestError.notConfiguredError(Exception('username and password are needed'))

        url_override = config.get('url', '').strip()
        if not url_override.startswith('http'):
            SuperdeskIngestError.notConfiguredError(Exception('if URL is set, it must be a valid http link'))

        if url_override:
            params = {'user': user, 'password': password, 'maksAntal': 50}
        else:
            params = {'user': user, 'password': password, 'maksAntal': 50, 'waitAcknowledge': 'true'}

        r = self.get_url(url_override, params=params)

        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while parsing the request answer'))

        try:
            if root_elt.xpath('(//error/text())[1]')[0] != '0':
                err_msg = root_elt.xpath('(//errormsg/text())[1]')[0]
                raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found'))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath('//RBNews'):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath('.//ServiceQueueId/text()')[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element'))
                ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id}
                self.get_url(URL_ACK, params=ack_params)

        return [items]
Esempio n. 17
0
class HTTPFeedingServiceBase(FeedingService):
    """
    Base class for feeding services using HTTP.

    This class contains helpers to make the creation of HTTP based feeding services
    easier.

    There are a couple of class attributes you can use:

    =======================  ===========
    Attribute                Explanation
    =======================  ===========
    HTTP_URL                 Main URL of your service, will be used by default in get_url
    HTTP_TIMEOUT             Timeout of requests in seconds
    HTTP_DEFAULT_PARAMETERS  Parameters used in every ``get`` requests.
                             Will be updated with params set in arguments
    HTTP_AUTH                Indicate if HTTP authentication is needed for your service.
                             If None, the authentication will be determined by the existence
                             of user and password. Will be overriden by auth_required config
                             if it exists.
    =======================  ===========

    In addition, you have some pre-filled fields:

    ===============  ===========
    Field            Explanation
    ===============  ===========
    AUTH_FIELDS      username and password fields
    AUTH_REQ_FIELDS  username and password fields + auth_required field to indicate
                     if they are needed
    ===============  ===========

    When ingest is updated, the provider is automatically saved to ``self.provider``.
    ``config`` property allows to access easily the user configuration.
    ``auth_info`` property returns a dictionary with ``username`` and ``password``

    ``get_url`` method do a HTTP Get request. url can be ommited in which case HTTP_URL will be used.
    Authentication parameters are set automatically, and errors are catched appropriately.
    Extra arguments are used directly in *requests* call.

    """

    ERRORS = [
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiRequestError().get_error_description(),
        IngestApiError.apiGeneralError().get_error_description(),
        SuperdeskIngestError.notConfiguredError().get_error_description()
    ]

    # override this parameter with the main URL to use
    HTTP_URL = None
    # timeout in seconds
    HTTP_TIMEOUT = 30
    # if some parameters are used in every request, put them here
    HTTP_DEFAULT_PARAMETERS = None
    # Set to True if authentication is mandatory, False if there is no authentication
    # and None to add authentication if user and password are defined.
    # If auth_required is defined in config fields, it will override this value.
    HTTP_AUTH = True

    # use this when auth is always required
    AUTH_FIELDS = [{
        'id': 'username',
        'type': 'text',
        'label': 'Username',
        'placeholder': 'Username',
        'required': True
    }, {
        'id': 'password',
        'type': 'password',
        'label': 'Password',
        'placeholder': 'Password',
        'required': True
    }]

    # use this when auth depends of a "auth_required" flag (set by user)
    AUTH_REQ_FIELDS = [{
        'id': 'auth_required',
        'type': 'boolean',
        'label': 'Requires Authentication',
        'placeholder': 'Requires Authentication',
        'required': False
    }, {
        'id': 'username',
        'type': 'text',
        'label': 'Username',
        'placeholder': 'Username',
        'required_expression': '{auth_required}',
        'show_expression': '{auth_required}'
    }, {
        'id': 'password',
        'type': 'password',
        'label': 'Password',
        'placeholder': 'Password',
        'required_expression': '{auth_required}',
        'show_expression': '{auth_required}'
    }]

    def __init__(self):
        self.token = None

    @property
    def auth_info(self):
        """Helper method to retrieve a dict with username and password when set"""
        username = self.config.get('username', '')
        password = self.config.get('password', '')
        if not username or not password:
            return None
        return {'username': username, 'password': password}

    @property
    def config(self):
        return self.provider.setdefault('config', {})

    def validate_config(self):
        """
        Validate provider config according to `cls.fields`

        :param config: Ingest provider configuration
        :type config: dict
        :return:
        """
        # validate required config fields
        required_keys = [
            field['id'] for field in self.fields
            if field.get('required', False)
        ]
        if not set(self.config.keys()).issuperset(required_keys):
            raise SuperdeskIngestError.notConfiguredError(
                Exception('{} are required.'.format(', '.join(required_keys))))

        url = self.config.get('url').strip()
        if not url:
            try:
                url_field = next({f for f in self.fields if f['id'] == u'url'})
            except StopIteration:
                url_required = False
            else:
                url_required = url_field.get('required', False)
            if url_required:
                raise SuperdeskIngestError.notConfiguredError(
                    Exception('URL is a required field.'))
        else:
            # validate url
            if not url.startswith('http'):
                raise SuperdeskIngestError.notConfiguredError(
                    Exception('URL must be a valid HTTP link.'))

    def get_url(self, url=None, **kwargs):
        """Do an HTTP Get on URL

        :param string url: url to use (None to use self.HTTP_URL)
        :param **kwargs: extra parameter for requests
        :return requests.Response: response
        """
        if not url:
            url = self.HTTP_URL
        config = self.config
        user = config.get('username')
        password = config.get('password')
        if user:
            user = user.strip()
        if password:
            password = password.strip()

        auth_required = config.get('auth_required', self.HTTP_AUTH)
        if auth_required is None:
            # auth_required may not be user in the feeding service
            # in this case with use authentification only if user
            # and password are set.
            auth_required = bool(user and password)

        if auth_required:
            if not user:
                raise SuperdeskIngestError.notConfiguredError(
                    "user is not configured")
            if not password:
                raise SuperdeskIngestError.notConfiguredError(
                    "password is not configured")
            kwargs.setdefault('auth', (user, password))

        params = kwargs.pop("params", {})
        if params or self.HTTP_DEFAULT_PARAMETERS:
            # if we have default parameters, we want them to be overriden
            # by conflicting params given in arguments
            if self.HTTP_DEFAULT_PARAMETERS:
                params.update(self.HTTP_DEFAULT_PARAMETERS)
            kwargs["params"] = params

        try:
            response = requests.get(url, timeout=self.HTTP_TIMEOUT, **kwargs)
        except requests.exceptions.Timeout as exception:
            raise IngestApiError.apiTimeoutError(exception, self.provider)
        except requests.exceptions.ConnectionError as exception:
            raise IngestApiError.apiConnectionError(exception, self.provider)
        except requests.exceptions.RequestException as exception:
            raise IngestApiError.apiRequestError(exception, self.provider)
        except Exception as exception:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(exception, self.provider)

        if not response.ok:
            exception = Exception(response.reason)
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(exception, self.provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(exception, self.provider)
            else:
                raise IngestApiError.apiGeneralError(exception, self.provider)

        return response

    def update(self, provider, update):
        self.provider = provider
        self.validate_config()
        return super().update(provider, update)