Beispiel #1
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Wenn'}
     with open(fixture, 'rb') as f:
         self.file = f.read()
         etree.fromstring(self.file)
         self.items = WENNFeedParser().parse(etree.fromstring(self.file), provider)
Beispiel #2
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', self.filename)
     provider = {'name': 'Wenn'}
     with open(fixture) as f:
         self.file = f.read()
         etree.fromstring(self.file)
         self.items = WENNParser().parse_message(etree.fromstring(self.file), provider)
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', self.filename)
     provider = {'name': 'Wenn'}
     with open(fixture) as f:
         self.file = f.read()
         etree.fromstring(self.file)
         self.items = WENNParser().parse_message(etree.fromstring(self.file), provider)
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', self.filename)
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFParser().parse_message(etree.fromstring(self.nitf), provider)
Beispiel #5
0
 def setUpFixture(self, filename):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', filename)
     with open(fixture) as f:
         self.tree = etree.fromstring(f.read().encode('utf-8'))
     parser = newsml_2_0.NewsMLTwoParser()
     self.item = parser.parse_message(self.tree)[0]
Beispiel #6
0
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url,
                                            params=payload,
                                            timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn(
                        'Reuters API timeout retrying, retries {}'.format(
                            retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError(
                    _('Not found {payload}').format(payload=payload))

            break

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Beispiel #7
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', 'afp.xml')
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         self.item = AFPNewsMLOneFeedParser().parse(
             etree.fromstring(f.read()), provider)
Beispiel #8
0
    def _generate_auth_token(self, provider):
        """
        Generates Authentication Token as per the configuration in Ingest Provider.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :return: token details if successfully authenticated
        :rtype: str
        :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration
        """
        session = requests.Session()
        session.mount('https://', SSLAdapter())

        auth_url = provider.get('config', {}).get('auth_url', None)
        if not auth_url:
            raise IngestApiError.apiGeneralError(provider=provider,
                                                 exception=KeyError('''
                                                     Ingest Provider {} is missing Authentication URL.
                                                     Please check the configuration.
                                                     '''.format(
                                                     provider['name'])))

        payload = {
            'username': provider.get('config', {}).get('username', ''),
            'password': provider.get('config', {}).get('password', ''),
        }

        response = session.get(auth_url,
                               params=payload,
                               verify=False,
                               timeout=30)
        tree = etree.fromstring(
            response.content)  # workaround for http mock lib

        return tree.text
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.scoop = f.read()
         self.item = ScoopNewsMLTwoFeedParser().parse(etree.fromstring(self.scoop), provider)
Beispiel #10
0
    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)

        if not self.path:
            return []

        for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
            try:
                filepath = os.path.join(self.path, filename)
                if os.path.isfile(filepath):
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
                    if self.is_latest_content(last_updated, provider.get('last_updated')):
                        with open(filepath, 'r') as f:
                            item = self.parser.parse_message(etree.fromstring(f.read()), provider)
                            self.move_file(self.path, filename, provider=provider, success=True)
                            yield [item]
                    else:
                        self.move_file(self.path, filename, provider=provider, success=True)
            except etreeParserError as ex:
                logger.exception("Ingest Type: AAP - File: {0} could not be processed".format(filename))
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ParserError.nitfParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
            except Exception as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ProviderError.ingestError(ex, provider)

        push_notification('ingest:update')
    def _generate_auth_token(self, provider):
        """
        Generates Authentication Token as per the configuration in Ingest Provider.

        :param provider: dict - Ingest provider details to which the current directory has been configured
        :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource`
        :return: token details if successfully authenticated
        :rtype: str
        :raises: IngestApiError.apiGeneralError() if auth_url is missing in the Ingest Provider configuration
        """
        session = requests.Session()
        session.mount('https://', SSLAdapter())

        auth_url = provider.get('config', {}).get('auth_url', None)
        if not auth_url:
            raise IngestApiError.apiGeneralError(provider=provider,
                                                 exception=KeyError(
                                                     '''
                                                     Ingest Provider {} is missing Authentication URL.
                                                     Please check the configuration.
                                                     '''.format(provider['name']))
                                                 )

        payload = {
            'username': provider.get('config', {}).get('username', ''),
            'password': provider.get('config', {}).get('password', ''),
        }

        response = session.get(auth_url, params=payload, verify=False, timeout=30)
        if response.status_code < 200 or response.status_code >= 300:
            raise IngestApiError.apiAuthError(provider=provider)

        tree = etree.fromstring(response.content)  # workaround for http mock lib
        return tree.text
Beispiel #12
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', self.filename)
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFParser().parse_message(etree.fromstring(self.nitf), provider)
 def _parse_file(self, filename):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         self.root_elt = etree.fromstring(f.read())
         self.item = RitzauFeedParser().parse(self.root_elt, provider)
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, "../fixtures", "ana1.xml"))
     provider = {"name": "Test", "source": "ANA"}
     with open(fixture, "rb") as f:
         self.item = ANANewsMLOneFeedParser().parse(
             etree.fromstring(f.read()), provider)
 def _parse_file(self, filename):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, '../fixtures', filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         self.root_elt = etree.fromstring(f.read())
         self.item = RitzauFeedParser().parse(self.root_elt, provider)
Beispiel #16
0
 def setUp(self):
     config.NITF_MAPPING = self.mapping
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFFeedParser().parse(etree.fromstring(self.nitf), provider)
Beispiel #17
0
 def setUp(self):
     super().setUp()
     config.NITF_MAPPING = self.mapping
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFFeedParser().parse(etree.fromstring(self.nitf), provider)
Beispiel #18
0
 def setUp(self):
     with self.app.app_context():
         self.app.data.insert('vocabularies', self.vocab)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         self.root_elt = etree.fromstring(f.read())
         self.item = RitzauFeedParser().parse(self.root_elt, provider)
Beispiel #19
0
 def setUp(self):
     with self.app.app_context():
         self.app.data.insert('vocabularies', self.vocab)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFFeedParser().parse(etree.fromstring(self.nitf), provider)
 def _run_parse(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         self.xml = f.read()
         self.xml_root = etree.fromstring(self.xml)
         self.item = self.parser.parse(self.xml_root, provider)
 def setUp(self):
     with self.app.app_context():
         self.app.data.insert('vocabularies', self.vocab)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         self.root_elt = etree.fromstring(f.read())
         self.items = NewsMLTwoFeedParser().parse(self.root_elt, provider)
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, "../fixtures", self.filename))
     provider = {"name": "Test"}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = EFEFeedParser().parse(etree.fromstring(self.nitf),
                                           provider)
Beispiel #23
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.scoop = f.read()
         self.item = ScoopNewsMLTwoFeedParser().parse(
             etree.fromstring(self.scoop), provider)
Beispiel #24
0
 def setUp(self):
     super().setUp()
     with self.app.app_context():
         self.app.data.insert('vocabularies', self.vocab)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFFeedParser().parse(etree.fromstring(self.nitf), provider)
Beispiel #25
0
 def setUp(self):
     config.NITF_MAPPING = self.mapping
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, "../fixtures", self.filename))
     provider = {"name": "Test"}
     with open(fixture, "rb") as f:
         self.nitf = f.read()
         self.item = NITFFeedParser().parse(etree.fromstring(self.nitf),
                                            provider)
Beispiel #26
0
 def test_get_subjects_with_invalid_qcode(self):
     xml = ('<?xml version="1.0" encoding="UTF-8"?>'
            '<nitf><head>'
            '<tobject tobject.type="News">'
            '<tobject.property tobject.property.type="Current" />'
            '<tobject.subject tobject.subject.refnum="00000000" '
            'tobject.subject.type="Justice" tobject.subject.matter="Police" />'
            '</tobject></head></nitf>')
     subjects = NITFFeedParser().get_subjects(etree.fromstring(xml))
     self.assertEqual(len(subjects), 0)
Beispiel #27
0
 def __init__(self, methodname):
     super().__init__(methodname)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         self.ori_file = f.read()
     self.articles = wordpress_wxr.WPWXRFeedParser().parse(
         etree.fromstring(self.ori_file), provider)
 def test_fixture_list_with_no_dates(self):
     filename = 'aap_cricket.xml'
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', filename))
     with open(fixture, 'rb') as f:
         self.xml = f.read()
         fixture = {'fixture_xml': etree.fromstring(self.xml), 'sport_id': '3',
                    'sport_name': 'Cricket', 'comp_name': 'Champions Trophy in England/Wales', 'comp_id': 'dom-436'}
         items = AAPSportsFixturesParser().parse(fixture, None)
         self.assertTrue(len(items) == 2)
Beispiel #29
0
 def test_fixtures(self):
     filename = 'aap_soccer.xml'
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', filename))
     with open(fixture, 'rb') as f:
         self.xml = f.read()
         fixture = {'fixture_xml': etree.fromstring(self.xml), 'sport_id': '4',
                    'sport_name': 'Soccer', 'comp_name': 'Qualifiers', 'comp_id': 'int-314'}
         items = AAPSportsFixturesParser().parse(fixture, None)
         self.assertTrue(len(items) == 5)
 def test_fixtures(self):
     filename = 'aap_soccer.xml'
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', filename))
     with open(fixture, 'rb') as f:
         self.xml = f.read()
         fixture = {'fixture_xml': etree.fromstring(self.xml), 'sport_id': '4',
                    'sport_name': 'Soccer', 'comp_name': 'Qualifiers', 'comp_id': 'int-314'}
         items = AAPSportsFixturesParser().parse(fixture, None)
         self.assertTrue(len(items) == 5)
Beispiel #31
0
 def setUp(self):
     with self.app.app_context():
         self.app.data.insert("vocabularies", self.vocab)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, "../fixtures", self.filename))
     provider = {"name": "Test"}
     with open(fixture, "rb") as f:
         self.root_elt = etree.fromstring(f.read())
         self.items = NewsMLTwoFeedParser().parse(self.root_elt, provider)
Beispiel #32
0
 def test_fixture_list_with_no_dates(self):
     filename = 'aap_cricket.xml'
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', filename))
     with open(fixture, 'rb') as f:
         self.xml = f.read()
         fixture = {'fixture_xml': etree.fromstring(self.xml), 'sport_id': '3',
                    'sport_name': 'Cricket', 'comp_name': 'Champions Trophy in England/Wales', 'comp_id': 'dom-436'}
         items = AAPSportsFixturesParser().parse(fixture, None)
         self.assertTrue(len(items) == 2)
Beispiel #33
0
 def test_get_subjects_with_invalid_qcode(self):
     xml = ('<?xml version="1.0" encoding="UTF-8"?>'
            '<nitf><head>'
            '<tobject tobject.type="News">'
            '<tobject.property tobject.property.type="Current" />'
            '<tobject.subject tobject.subject.refnum="00000000" '
            'tobject.subject.type="Justice" tobject.subject.matter="Police" />'
            '</tobject></head></nitf>')
     subjects = get_subjects(etree.fromstring(xml))
     self.assertEqual(len(subjects), 0)
Beispiel #34
0
 def __init__(self, methodname):
     super().__init__(methodname)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, "../fixtures", self.filename))
     provider = {"name": "Test"}
     with open(fixture, "rb") as f:
         buf = f.read()
     self.ori_file = buf
     parser = etree.XMLParser(recover=True)
     parsed = etree.fromstring(buf, parser)
     self.articles = wordpress_wxr.WPWXRFeedParser().parse(parsed, provider)
    def _get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload.

        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        if not self.session:
            self.session = requests.Session()

        retries = 0
        while True:
            try:
                response = self.session.get(url, params=payload, timeout=(30, 15))
            except requests.exceptions.Timeout as ex:
                if retries < 3:
                    logger.warn('Reuters API timeout retrying, retries {}'.format(retries))
                    retries += 1
                    continue
                raise IngestApiError.apiTimeoutError(ex, self.provider)
            except requests.exceptions.TooManyRedirects as ex:
                # Tell the user their URL was bad and try a different one
                raise IngestApiError.apiRedirectError(ex, self.provider)
            except requests.exceptions.RequestException as ex:
                # catastrophic error. bail.
                raise IngestApiError.apiRequestError(ex, self.provider)
            except Exception as error:
                traceback.print_exc()
                raise IngestApiError.apiGeneralError(error, self.provider)

            if response.status_code == 404:
                raise LookupError('Not found %s' % payload)

            break

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Beispiel #36
0
 def test_fixture_list_with_dates(self):
     filename = 'aap_golf.xml'
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', filename))
     with open(fixture, 'rb') as f:
         self.xml = f.read()
         fixture = {'fixture_xml': etree.fromstring(self.xml), 'sport_id': '8',
                    'sport_name': 'Golf', 'comp_name': 'D+D Real Czech Masters', 'comp_id': 'int-26669'}
         items = AAPSportsFixturesParser().parse(fixture, None)
         self.assertTrue(len(items) == 1)
         self.assertEqual(items[0].get('calendars')[0].get('qcode'), 'sport')
 def test_fixture_list_with_dates(self):
     filename = 'aap_golf.xml'
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', filename))
     with open(fixture, 'rb') as f:
         self.xml = f.read()
         fixture = {'fixture_xml': etree.fromstring(self.xml), 'sport_id': '8',
                    'sport_name': 'Golf', 'comp_name': 'D+D Real Czech Masters', 'comp_id': 'int-26669'}
         items = AAPSportsFixturesParser().parse(fixture, None)
         self.assertTrue(len(items) == 1)
         self.assertEqual(items[0].get('calendars')[0].get('qcode'), 'sport')
         self.assertEqual(items[0].get('calendars')[1].get('qcode'), 'sportgeneral')
 def __init__(self, methodname):
     super().__init__(methodname)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         buf = f.read()
     self.ori_file = buf
     buf = buf.replace(b'\r', b'&#13;')
     parser = etree.XMLParser(recover=True)
     parsed = etree.fromstring(buf, parser)
     self.articles = wordpress_wxr.WPWXRFeedParser().parse(parsed, provider)
Beispiel #39
0
    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)
        if not self.path:
            return

        for filename in get_sorted_files(self.path,
                                         sort_by=FileSortAttributes.created):
            try:
                if os.path.isfile(os.path.join(self.path, filename)):
                    filepath = os.path.join(self.path, filename)
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime,
                                                          tz=utc)
                    if self.is_latest_content(last_updated,
                                              provider.get('last_updated')):
                        with open(os.path.join(self.path, filename), 'r') as f:
                            item = self.parser.parse_message(
                                etree.fromstring(f.read()), provider)

                            self.add_timestamps(item)
                            self.move_file(self.path,
                                           filename,
                                           provider=provider,
                                           success=True)
                            yield [item]
                    else:
                        self.move_file(self.path,
                                       filename,
                                       provider=provider,
                                       success=True)
            except etreeParserError as ex:
                logger.exception(
                    "Ingest Type: AFP - File: {0} could not be processed".
                    format(filename), ex)
                self.move_file(self.path,
                               filename,
                               provider=provider,
                               success=False)
                raise ParserError.newsmlOneParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path,
                               filename,
                               provider=provider,
                               success=False)
            except Exception as ex:
                self.move_file(self.path,
                               filename,
                               provider=provider,
                               success=False)
                raise ProviderError.ingestError(ex, provider)

        push_notification('ingest:update')
Beispiel #40
0
 def test_get_subjects(self):
     xml = ('<?xml version="1.0" encoding="UTF-8"?>'
            '<nitf><head>'
            '<tobject tobject.type="News">'
            '<tobject.property tobject.property.type="Current" />'
            '<tobject.subject tobject.subject.refnum="02003000" '
            'tobject.subject.type="Justice" tobject.subject.matter="Police" />'
            '</tobject></head></nitf>')
     subjects = get_subjects(etree.fromstring(xml))
     self.assertEqual(len(subjects), 2)
     self.assertIn({'qcode': '02000000', 'name': 'Justice'}, subjects)
     self.assertIn({'qcode': '02003000', 'name': 'Police'}, subjects)
Beispiel #41
0
 def test_get_subjects(self):
     xml = ('<?xml version="1.0" encoding="UTF-8"?>'
            '<nitf><head>'
            '<tobject tobject.type="News">'
            '<tobject.property tobject.property.type="Current" />'
            '<tobject.subject tobject.subject.refnum="02003000" '
            'tobject.subject.type="Justice" tobject.subject.matter="Police" />'
            '</tobject></head></nitf>')
     subjects = NITFFeedParser().get_subjects(etree.fromstring(xml))
     self.assertEqual(len(subjects), 2)
     self.assertIn({'qcode': '02000000', 'name': 'Justice'}, subjects)
     self.assertIn({'qcode': '02003000', 'name': 'Police'}, subjects)
 def __init__(self, methodname):
     super().__init__(methodname)
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.normpath(
         os.path.join(dirname, '../fixtures', self.filename))
     provider = {'name': 'Test'}
     with open(fixture, 'rb') as f:
         buf = f.read()
     self.ori_file = buf
     buf = buf.replace(b'\r', b'&#13;')
     parser = etree.XMLParser(recover=True)
     parsed = etree.fromstring(buf, parser)
     self.articles = wordpress_wxr.WPWXRFeedParser().parse(parsed, provider)
Beispiel #43
0
 def test_get_subjects(self):
     xml = (
         b'<?xml version="1.0" encoding="UTF-8"?>'
         b"<nitf><head>"
         b'<tobject tobject.type="News">'
         b'<tobject.property tobject.property.type="Current" />'
         b'<tobject.subject tobject.subject.refnum="02003000" '
         b'tobject.subject.type="Justice" tobject.subject.matter="Police" />'
         b"</tobject></head></nitf>")
     subjects = NITFFeedParser().get_subjects(etree.fromstring(xml))
     self.assertEqual(len(subjects), 2)
     self.assertIn({"qcode": "02000000", "name": "Justice"}, subjects)
     self.assertIn({"qcode": "02003000", "name": "Police"}, subjects)
Beispiel #44
0
 def test_get_subjects(self):
     xml = (
         '<?xml version="1.0" encoding="UTF-8"?>'
         "<nitf><head>"
         '<tobject tobject.type="News">'
         '<tobject.property tobject.property.type="Current" />'
         '<tobject.subject tobject.subject.refnum="02003000" '
         'tobject.subject.type="Justice" tobject.subject.matter="Police" />'
         "</tobject></head></nitf>"
     )
     subjects = get_subjects(etree.fromstring(xml))
     self.assertEqual(len(subjects), 2)
     self.assertIn({"qcode": "02000000", "name": "Justice"}, subjects)
     self.assertIn({"qcode": "02003000", "name": "Police"}, subjects)
Beispiel #45
0
    def parse_file(self, filename, provider):
        try:
            self.path = provider.get('config', {}).get('path', None)

            if not self.path:
                return []

            with open(os.path.join(self.path, filename), 'r') as f:
                item = self.parser.parse_message(etree.fromstring(f.read()), provider)

            return [item]
        except Exception as ex:
            self.move_file(self.path, filename, provider=provider, success=False)
            raise ParserError.parseFileError('AAP', filename, ex, provider)
def fetch_token_from_api(provider):
    session = requests.Session()
    session.mount('https://', SSLAdapter())

    url = 'https://commerce.reuters.com/rmd/rest/xml/login'
    payload = {
        'username': provider.get('config', {}).get('username', ''),
        'password': provider.get('config', {}).get('password', ''),
    }

    response = session.get(url, params=payload, stream=False, verify=False)
    # workaround for httmock lib
    # tree = etree.fromstring(response.text)
    tree = etree.fromstring(response.content)
    return tree.text
def fetch_token_from_api(provider):
    session = requests.Session()
    session.mount("https://", SSLAdapter())

    url = "https://commerce.reuters.com/rmd/rest/xml/login"
    payload = {
        "username": provider.get("config", {}).get("username", ""),
        "password": provider.get("config", {}).get("password", ""),
    }

    response = session.get(url, params=payload, stream=False, verify=False)
    # workaround for httmock lib
    # tree = etree.fromstring(response.text)
    tree = etree.fromstring(response.content)
    return tree.text
Beispiel #48
0
def fetch_token_from_api(provider):
    session = requests.Session()
    session.mount('https://', SSLAdapter())

    url = 'https://commerce.reuters.com/rmd/rest/xml/login'
    payload = {
        'username': provider.get('config', {}).get('username', ''),
        'password': provider.get('config', {}).get('password', ''),
    }

    response = session.get(url, params=payload, verify=False, timeout=30)
    # workaround for httmock lib
    # tree = etree.fromstring(response.text)
    tree = etree.fromstring(response.content)
    return tree.text
Beispiel #49
0
    def setUp(self):
        super().setUp()
        # we need to prepopulate vocabularies to get qcodes
        voc_file = os.path.join(os.path.abspath(os.path.dirname(settings.__file__)), "data/vocabularies.json")
        VocabulariesPopulateCommand().run(voc_file)

        # settings are needed in order to get into account NITF_MAPPING
        for key in dir(settings):
            if key.isupper():
                setattr(config, key, getattr(settings, key))
        dirname = os.path.dirname(os.path.realpath(__file__))
        fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
        provider = {'name': 'Test'}
        with open(fixture) as f:
            self.nitf = f.read()
            self.item = NITFFeedParser().parse(etree.fromstring(self.nitf), provider)
Beispiel #50
0
    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(
                response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Beispiel #51
0
    def _get_tree(self, endpoint, payload=None):
        """
        Get xml response for given API endpoint and payload.
        :param: endpoint
        :type endpoint: str
        :param: payload
        :type payload: str
        """

        if payload is None:
            payload = {}

        payload['token'] = self._get_auth_token(self.provider, update=True)
        url = self._get_absolute_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=15)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            return etree.fromstring(response.content)  # workaround for http mock lib
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
Beispiel #52
0
    def get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload."""
        if payload is None:
            payload = {}
        payload['token'] = self.get_token()
        url = self.get_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=21.0)
        except requests.exceptions.Timeout as ex:
            # Maybe set up for a retry, or continue in a retry loop
            raise IngestApiError.apiTimeoutError(ex, self.provider)
        except requests.exceptions.TooManyRedirects as ex:
            # Tell the user their URL was bad and try a different one
            raise IngestApiError.apiRedirectError(ex, self.provider)
        except requests.exceptions.RequestException as ex:
            # catastrophic error. bail.
            raise IngestApiError.apiRequestError(ex, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)

        if response.status_code == 404:
            raise IngestApiError.apiNotFoundError(LookupError('Not found %s' % payload), self.provider)

        try:
            # workaround for httmock lib
            # return etree.fromstring(response.text.encode('utf-8'))
            return etree.fromstring(response.content)
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise IngestApiError.apiUnicodeError(error, self.provider)
        except ParseError as error:
            traceback.print_exc()
            raise IngestApiError.apiParseError(error, self.provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError(error, self.provider)
Beispiel #53
0
    def get_tree(self, endpoint, payload=None):
        """Get xml response for given API endpoint and payload."""
        if payload is None:
            payload = {}
        payload['token'] = self.get_token()
        url = self.get_url(endpoint)

        try:
            response = requests.get(url, params=payload, timeout=21.0)
        except Exception as error:
            traceback.print_exc()
            raise error

        if response.status_code == 404:
            raise LookupError('Not found %s' % payload)

        try:
            # workaround for httmock lib
            # return etree.fromstring(response.text.encode('utf-8'))
            return etree.fromstring(response.content)
        except UnicodeEncodeError as error:
            traceback.print_exc()
            raise error
Beispiel #54
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', 'afp.xml')
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)
Beispiel #55
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, "fixtures", "afp.xml")
     provider = {"name": "Test"}
     with open(fixture) as f:
         self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)
 def test_can_parse(self):
     self.assertTrue(ScoopNewsMLTwoFeedParser().can_parse(etree.fromstring(self.scoop)))
Beispiel #57
0
def get_etree(filename):
    dirname = os.path.dirname(os.path.realpath(__file__))
    with open(os.path.join(dirname, 'fixtures', filename)) as f:
        return etree.fromstring(f.read().encode('utf-8'))
Beispiel #58
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', 'afp.xml')
     with open(fixture) as f:
         self.item = Parser().parse_message(etree.fromstring(f.read()))