Example #1
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', self.filename)
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFParser().parse_message(etree.fromstring(self.nitf), provider)
Example #2
0
 def __init__(self):
     self.tz = timezone('Australia/Sydney')
     self.parser = NITFParser()
Example #3
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', 'aap.xml')
     with open(fixture) as f:
         self.nitf = f.read()
         self.item = NITFParser().parse_message(etree.fromstring(self.nitf))
Example #4
0
class AAPIngestService(FileIngestService):
    """AAP Ingest Service"""

    PROVIDER = 'aap'

    ERRORS = [ParserError.nitfParserError().get_error_description(),
              ProviderError.ingestError().get_error_description(),
              ParserError.parseFileError().get_error_description()]

    def __init__(self):
        self.tz = timezone('Australia/Sydney')
        self.parser = NITFParser()

    def prepare_href(self, href):
        return href

    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)

        if not self.path:
            return []

        for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
            try:
                filepath = os.path.join(self.path, filename)
                if os.path.isfile(filepath):
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
                    if self.is_latest_content(last_updated, provider.get('last_updated')):
                        with open(filepath, 'r') as f:
                            item = self.parser.parse_message(etree.fromstring(f.read()), provider)
                            self.move_file(self.path, filename, provider=provider, success=True)
                            yield [item]
                    else:
                        self.move_file(self.path, filename, provider=provider, success=True)
            except etreeParserError as ex:
                logger.exception("Ingest Type: AAP - File: {0} could not be processed".format(filename))
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ParserError.nitfParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
            except Exception as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ProviderError.ingestError(ex, provider)

        push_notification('ingest:update')

    def parse_file(self, filename, provider):
        try:
            self.path = provider.get('config', {}).get('path', None)

            if not self.path:
                return []

            with open(os.path.join(self.path, filename), 'r') as f:
                item = self.parser.parse_message(etree.fromstring(f.read()), provider)

            return [item]
        except Exception as ex:
            self.move_file(self.path, filename, provider=provider, success=False)
            raise ParserError.parseFileError('AAP', filename, ex, provider)
Example #5
0
class TestCase(unittest.TestCase):

    def setUp(self):
        dirname = os.path.dirname(os.path.realpath(__file__))
        fixture = os.path.join(dirname, 'fixtures', 'aap.xml')
        with open(fixture) as f:
            self.nitf = f.read()
            self.item = NITFParser().parse_message(etree.fromstring(self.nitf))

    def test_headline(self):
        self.assertEquals(self.item.get('headline'), "The main stories on today's 1900 ABC TV news")

    def test_keywords(self):
        self.assertEquals(self.item.get('slugline'), 'Monitor 1900 ABC News')

    def test_subjects(self):
        self.assertEquals(len(self.item.get('subject')), 2)
        self.assertIn({'name': 'Justice'}, self.item.get('subject'))
        self.assertIn({'qcode': '02003000', 'name': 'Police'}, self.item.get('subject'))

    def test_guid(self):
        self.assertEquals(self.item.get('guid'), 'AAP.115314987.5417374')
        self.assertEquals(self.item.get('guid'), self.item.get('uri'))

    def test_type(self):
        self.assertEquals(self.item.get('type'), 'text')

    def test_urgency(self):
        self.assertEquals(self.item.get('urgency'), '5')

    def test_dateline(self):
        self.assertEquals(self.item.get('dateline'), 'Sydney')

    def test_byline(self):
        self.assertEquals(self.item.get('byline'), 'By John Doe')

    def test_abstract(self):
        self.assertEquals(self.item.get('abstract'), 'The main stories on today\'s 1900 ABC TV news')

    # def test_copyright(self):
    #     self.assertEquals(self.item.get('copyrightholder'), 'Australian Associated Press')

    def test_dates(self):
        self.assertEquals(self.item.get('firstcreated').isoformat(), '2013-10-20T19:27:51')
        self.assertEquals(self.item.get('versioncreated').isoformat(), '2013-10-20T19:27:51')

    def test_content(self):
        text = "<p>   1A) More extreme weather forecast over the next few days the <br />fire situation is likely"
        self.assertIn(text, self.item.get('body_html'))

    def test_pubstatus(self):
        self.assertEquals('usable', self.item.get('pubstatus'))

    def test_ingest_provider_sequence(self):
        self.assertEquals(self.item.get('ingest_provider_sequence'), '1747')

    def test_anpa_category(self):
        self.assertEquals(self.item.get('anpa-category')['qcode'], 'a')