def setUp(self): dirname = os.path.dirname(os.path.realpath(__file__)) fixture = os.path.join(dirname, 'fixtures', 'afp.xml') provider = {'name': 'Test'} with open(fixture) as f: self.item = NewsMLOneParser().parse_message( etree.fromstring(f.read()), provider)
class AFPIngestService(FileIngestService): """AFP Ingest Service""" PROVIDER = 'afp' ERRORS = [ParserError.newsmlOneParserError().get_error_description(), ProviderError.ingestError().get_error_description()] def __init__(self): self.parser = NewsMLOneParser() def _update(self, provider): self.provider = provider self.path = provider.get('config', {}).get('path', None) if not self.path: return for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created): try: if os.path.isfile(os.path.join(self.path, filename)): filepath = os.path.join(self.path, filename) stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('last_updated')): with open(os.path.join(self.path, filename), 'r') as f: item = self.parser.parse_message(etree.fromstring(f.read()), provider) self.add_timestamps(item) self.move_file(self.path, filename, provider=provider, success=True) yield [item] else: self.move_file(self.path, filename, provider=provider, success=True) except etreeParserError as ex: logger.exception("Ingest Type: AFP - File: {0} could not be processed".format(filename), ex) self.move_file(self.path, filename, provider=provider, success=False) raise ParserError.newsmlOneParserError(ex, provider) except ParserError as ex: self.move_file(self.path, filename, provider=provider, success=False) except Exception as ex: self.move_file(self.path, filename, provider=provider, success=False) raise ProviderError.ingestError(ex, provider) push_notification('ingest:update')
def setUp(self): dirname = os.path.dirname(os.path.realpath(__file__)) fixture = os.path.join(dirname, 'fixtures', 'afp.xml') provider = {'name': 'Test'} with open(fixture) as f: self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)
class TestCase(unittest.TestCase): def setUp(self): dirname = os.path.dirname(os.path.realpath(__file__)) fixture = os.path.join(dirname, 'fixtures', 'afp.xml') provider = {'name': 'Test'} with open(fixture) as f: self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider) def test_headline(self): self.assertEquals(self.item.get('headline'), 'Sweden court accepts receivership for Saab carmaker') def test_dateline(self): self.assertEquals(self.item.get('dateline'), 'STOCKHOLM, Aug 29, 2014 (AFP) -') def test_slugline(self): self.assertEquals(self.item.get('slugline'), 'Sweden-SAAB') def test_byline(self): self.assertEquals(self.item.get('byline'), '') def test_language(self): self.assertEquals(self.item.get('language'), 'en') def test_guid(self): self.assertEquals(self.item.get('guid'), 'urn:newsml:afp.com:20140829T135002Z:TX-PAR-FXW86:1') def test_coreitemvalues(self): self.assertEquals(self.item.get('type'), 'text') self.assertEquals(self.item.get('urgency'), '4') self.assertEquals(self.item.get('version'), '1') self.assertEquals(self.item.get('versioncreated'), datetime.datetime(2014, 8, 29, 13, 49, 51)) self.assertEquals(self.item.get('firstcreated'), datetime.datetime(2014, 8, 29, 13, 49, 51)) self.assertEquals(self.item.get('pubstatus'), 'usable') def test_subjects(self): self.assertEquals(len(self.item.get('subject')), 5) self.assertIn({'name': 'automotive equipment', 'qcode': '04011002'}, self.item.get('subject')) self.assertIn({'name': 'bankruptcy', 'qcode': '04016007'}, self.item.get('subject')) self.assertIn({'name': 'economy, business and finance', 'qcode': '04000000'}, self.item.get('subject')) self.assertIn({'name': 'quarterly or semiannual financial statement', 'qcode': '04016038'}, self.item.get('subject')) self.assertIn({'name': 'manufacturing and engineering', 'qcode': '04011000'}, self.item.get('subject')) def test_usageterms(self): self.assertEquals(self.item.get('usageterms'), 'NO ARCHIVAL USE') def test_genre(self): self.assertIn({'name': 'business'}, self.item.get('genre')) self.assertIn({'name': 'bankruptcy'}, self.item.get('genre')) def test_content_is_text(self): self.assertIsInstance(self.item.get('body_html'), type('')) self.assertNotRegex(self.item.get('body_html'), '<body.content>')
def setUp(self): dirname = os.path.dirname(os.path.realpath(__file__)) fixture = os.path.join(dirname, "fixtures", "afp.xml") provider = {"name": "Test"} with open(fixture) as f: self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)
class TestCase(unittest.TestCase): def setUp(self): dirname = os.path.dirname(os.path.realpath(__file__)) fixture = os.path.join(dirname, "fixtures", "afp.xml") provider = {"name": "Test"} with open(fixture) as f: self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider) def test_headline(self): self.assertEquals(self.item.get("headline"), "Sweden court accepts receivership for Saab carmaker") def test_dateline(self): self.assertEquals(self.item.get("dateline"), "STOCKHOLM, Aug 29, 2014 (AFP) -") def test_slugline(self): self.assertEquals(self.item.get("slugline"), "Sweden-SAAB") def test_byline(self): self.assertEquals(self.item.get("byline"), "") def test_language(self): self.assertEquals(self.item.get("language"), "en") def test_guid(self): self.assertEquals(self.item.get("guid"), "urn:newsml:afp.com:20140829T135002Z:TX-PAR-FXW86:1") def test_coreitemvalues(self): self.assertEquals(self.item.get("type"), "text") self.assertEquals(self.item.get("urgency"), "4") self.assertEquals(self.item.get("version"), "1") self.assertEquals(self.item.get("versioncreated"), datetime.datetime(2014, 8, 29, 13, 49, 51)) self.assertEquals(self.item.get("firstcreated"), datetime.datetime(2014, 8, 29, 13, 49, 51)) self.assertEquals(self.item.get("pubstatus"), "Usable") def test_subjects(self): self.assertEquals(len(self.item.get("subject")), 5) self.assertIn({"name": "automotive equipment", "qcode": "04011002"}, self.item.get("subject")) self.assertIn({"name": "bankruptcy", "qcode": "04016007"}, self.item.get("subject")) self.assertIn({"name": "economy, business and finance", "qcode": "04000000"}, self.item.get("subject")) self.assertIn( {"name": "quarterly or semiannual financial statement", "qcode": "04016038"}, self.item.get("subject") ) self.assertIn({"name": "manufacturing and engineering", "qcode": "04011000"}, self.item.get("subject")) def test_usageterms(self): self.assertEquals(self.item.get("usageterms"), "NO ARCHIVAL USE") def test_genre(self): self.assertIn({"name": "business"}, self.item.get("genre")) self.assertIn({"name": "bankruptcy"}, self.item.get("genre")) def test_content_is_text(self): self.assertIsInstance(self.item.get("body_html"), type("")) self.assertNotRegex(self.item.get("body_html"), "<body.content>")
def __init__(self): self.parser = NewsMLOneParser()
class TestCase(unittest.TestCase): def setUp(self): dirname = os.path.dirname(os.path.realpath(__file__)) fixture = os.path.join(dirname, 'fixtures', 'afp.xml') with open(fixture) as f: self.item = NewsMLOneParser().parse_message( etree.fromstring(f.read())) def test_headline(self): self.assertEquals( self.item.get('headline'), 'Sweden court accepts receivership for Saab carmaker') def test_dateline(self): self.assertEquals(self.item.get('dateline'), 'STOCKHOLM, Aug 29, 2014 (AFP) -') def test_slugline(self): self.assertEquals(self.item.get('slugline'), 'Sweden-SAAB') def test_byline(self): self.assertEquals(self.item.get('byline'), '') def test_language(self): self.assertEquals(self.item.get('language'), 'en') def test_guid(self): self.assertEquals( self.item.get('guid'), 'urn:newsml:afp.com:20140829T135002Z:TX-PAR-FXW86:1') def test_coreitemvalues(self): self.assertEquals(self.item.get('type'), 'text') self.assertEquals(self.item.get('urgency'), '4') self.assertEquals(self.item.get('version'), '1') self.assertEquals(self.item.get('versioncreated'), datetime.datetime(2014, 8, 29, 13, 49, 51)) self.assertEquals(self.item.get('firstcreated'), datetime.datetime(2014, 8, 29, 13, 49, 51)) self.assertEquals(self.item.get('pubstatus'), 'Usable') def test_subjects(self): self.assertEquals(len(self.item.get('subject')), 5) self.assertIn({ 'name': 'automotive equipment', 'qcode': '04011002' }, self.item.get('subject')) self.assertIn({ 'name': 'bankruptcy', 'qcode': '04016007' }, self.item.get('subject')) self.assertIn( { 'name': 'economy, business and finance', 'qcode': '04000000' }, self.item.get('subject')) self.assertIn( { 'name': 'quarterly or semiannual financial statement', 'qcode': '04016038' }, self.item.get('subject')) self.assertIn( { 'name': 'manufacturing and engineering', 'qcode': '04011000' }, self.item.get('subject')) def test_usageterms(self): self.assertEquals(self.item.get('usageterms'), 'NO ARCHIVAL USE') def test_genre(self): self.assertIn({'name': 'business'}, self.item.get('genre')) self.assertIn({'name': 'bankruptcy'}, self.item.get('genre'))
class AFPIngestService(FileIngestService): """AFP Ingest Service""" PROVIDER = 'afp' ERRORS = [ ParserError.newsmlOneParserError().get_error_description(), ProviderError.ingestError().get_error_description() ] def __init__(self): self.parser = NewsMLOneParser() def _update(self, provider): self.provider = provider self.path = provider.get('config', {}).get('path', None) if not self.path: return for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created): try: if os.path.isfile(os.path.join(self.path, filename)): filepath = os.path.join(self.path, filename) stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('last_updated')): with open(os.path.join(self.path, filename), 'r') as f: item = self.parser.parse_message( etree.fromstring(f.read()), provider) self.add_timestamps(item) self.move_file(self.path, filename, provider=provider, success=True) yield [item] else: self.move_file(self.path, filename, provider=provider, success=True) except etreeParserError as ex: logger.exception( "Ingest Type: AFP - File: {0} could not be processed". format(filename), ex) self.move_file(self.path, filename, provider=provider, success=False) raise ParserError.newsmlOneParserError(ex, provider) except ParserError as ex: self.move_file(self.path, filename, provider=provider, success=False) except Exception as ex: self.move_file(self.path, filename, provider=provider, success=False) raise ProviderError.ingestError(ex, provider) push_notification('ingest:update')