def TestIPNewsHtmlToText(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': '<p>The story body line 1<br>Line 2</p>\ <p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi more</p>', 'word_count': '1', 'priority': '1' } with subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, item = f.format(article, subscriber)[0] expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \ 'abcdefghi abcdefghi abcdefghi abcdefghi \r\nmore' self.assertEqual(item['article_text'], expected)
def TestMultipleCategories(self): article = { 'source': 'AAP', 'anpa_category': [{'name': 'Finance', 'qcode': 'F'}, {'name': 'Overseas Sport', 'qcode': 'S'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': '1', 'task': {'desk': 1} } with subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() docs = f.format(article, subscriber) self.assertEqual(len(docs), 2) for seq, doc in docs: if doc['category'] == 'S': self.assertEqual(doc['subject_reference'], '15011002') self.assertEqual(doc['subject_detail'], 'four-man sled') if doc['category'] == 'F': self.assertEqual(doc['subject_reference'], '04001005') self.assertEqual(doc['subject_detail'], 'viniculture') codes = set(doc['selector_codes'].split(' ')) expected_codes = set('cxx 0fh axx az and pxx 0ah 0ir 0px 0hw pnd pxd cnd cxd 0nl axd'.split(' ')) self.assertSetEqual(codes, expected_codes)
def testGeoBlockNotTwoStates(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': '1', 'task': {'desk': 1}, 'urgency': 1, 'place': [{'qcode': 'VIC', 'name': 'VIC'}], 'targeted_for': [{'name': 'New South Wales', 'allow': False}, {'name': 'Victoria', 'allow': False}] } subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, doc = f.format(article, subscriber)[0] codes = set(doc['selector_codes'].split(' ')) expected_codes_str = 'an5 an4 an7 an6 ax5 ax6 ax7 an8 px6 ax4 ax8 px5 0ah 0px' expected_codes_str += ' px8 0fh px7 px4 pn4 pn5 pn6 pn7 px0' expected_codes = set(expected_codes_str.split(' ')) self.assertSetEqual(codes, expected_codes)
def testGeoBlockNotTwoStates(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': 1, 'task': {'desk': 1}, 'urgency': 1, 'place': [{'qcode': 'VIC', 'name': 'VIC'}], 'targeted_for': [{'name': 'New South Wales', 'allow': False}, {'name': 'Victoria', 'allow': False}] } subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, doc = f.format(article, subscriber)[0] codes = set(doc['selector_codes'].split(' ')) expected_codes_str = 'an5 an4 an7 an6 ax5 ax6 ax7 an8 px6 ax4 ax8 px5 0ah 0px' expected_codes_str += ' px8 0fh px7 px4 pn4 pn5 pn6 pn7 px0' expected_codes = set(expected_codes_str.split(' ')) self.assertSetEqual(codes, expected_codes)
def testIPNewsHtmlToText(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': '<p>The story body line 1<br>Line 2</p>\ <p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi more</p>', 'word_count': '1', 'priority': 1 } subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, item = f.format(article, subscriber)[0] expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \ 'abcdefghi abcdefghi abcdefghi abcdefghi \r\nmore' self.assertEqual(item['article_text'], expected)
def test_aap_ipnews_formatter_with_body_footer(self): subscriber ='subscribers', None, None)[0] doc = self.article.copy() doc['body_footer'] = 'call helpline 999 if you are planning to quit smoking' f = AAPIpNewsFormatter() seq, item = f.format(doc, subscriber)[0] self.assertGreater(int(seq), 0) self.assertEqual(seq, item['sequence']) item.pop('sequence') self.assertDictEqual( item, { 'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP', 'take_key': 'take_key', 'article_text': 'The story body<br>call helpline 999 if you are planning to quit smoking', 'priority': 'f', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', 'wordcount': '1', 'subject_detail': 'international court or tribunal', 'genre': 'Current', 'keyword': 'slugline', 'author': 'joe' })
def testIPNewsFormatter(self): subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, item = f.format(self.article, subscriber)[0] item = json.loads(item) self.assertGreater(int(seq), 0) self.assertEqual(seq, item['sequence']) item.pop('sequence') self.assertDictEqual( item, { 'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP', 'take_key': 'take_key', 'article_text': 'The story body', 'priority': 'f', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', 'wordcount': '1', 'subject_detail': 'international court or tribunal', 'genre': 'Current', 'keyword': 'slugline', 'author': 'joe' })
def testIpNewsFormatterNoSubject(self): article = { 'source': 'AAP', 'anpa_category': [{ 'qcode': 'a' }], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': 1, 'task': { 'desk': 1 }, 'urgency': 1, 'place': [{ 'qcode': 'VIC', 'name': 'VIC' }] } subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, doc = f.format(article, subscriber)[0] doc = json.loads(doc) self.assertEqual(doc['subject_reference'], '00000000') self.assertEqual(doc['headline'], 'VIC:This is a test headline') article = { 'source': 'AAP', 'anpa_category': [{ 'qcode': 'a' }], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': None, 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': 1, 'task': { 'desk': 1 }, 'urgency': 1, 'place': None } seq, doc = f.format(article, subscriber)[0] doc = json.loads(doc) self.assertEqual(doc['subject_reference'], '00000000') self.assertEqual(doc['headline'], 'This is a test headline')
def Test_is_in_subject(self): article = { 'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}], } f = AAPIpNewsFormatter() self.assertTrue(f._is_in_subject(article, '150')) self.assertFalse(f._is_in_subject(article, '151')) self.assertTrue(f._is_in_subject(article, '04001'))
def Test_set_selector_codes(self): article = { 'task': {'desk': 1}, 'slugline': 'Test', 'urgency': 3 } f = AAPIpNewsFormatter() odbc_item = {} with f._set_selector_codes(article, 'ipnews', odbc_item, 'A') self.assertSetEqual(set(odbc_item['selector_codes'].split()), set('and axd pnd cxd 0fh 0ir 0px 0ah 0hw cxx axx cnd 0nl az pxd pxx'.split()))
def testIpNewsFormatterNoSubject(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': 1, 'task': {'desk': 1}, 'urgency': 1, 'place': [{'qcode': 'VIC', 'name': 'VIC'}] } subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, doc = f.format(article, subscriber)[0] doc = json.loads(doc) self.assertEqual(doc['subject_reference'], '00000000') self.assertEqual(doc['headline'], 'VIC:This is a test headline') article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': None, 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': 1, 'task': {'desk': 1}, 'urgency': 1, 'place': None } seq, doc = f.format(article, subscriber)[0] doc = json.loads(doc) self.assertEqual(doc['subject_reference'], '00000000') self.assertEqual(doc['headline'], 'This is a test headline')
def TestIPNewsFormatter(self): with output_channel ='output_channels', None, None)[0] f = AAPIpNewsFormatter() seq, item = f.format(self.article, output_channel, self.sel_codes) self.assertGreater(int(seq), 0) self.assertEquals(seq, item['sequence']) item.pop('sequence') self.assertDictEqual(item, {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0', 'headline': 'This is a test headline', 'selector_codes': 'aaa bbb', 'service_level': 'a', 'originator': 'AAP', 'take_key': 'take_key', 'article_text': 'The story body', 'priority': '1', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', 'wordcount': '1', 'subject_detail': 'international court or tribunal', 'genre': 'Current', 'keyword': 'slugline', 'author': 'joe'})
def testIPNewsFormatter(self): subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() seq, item = f.format(self.article, subscriber)[0] item = json.loads(item) self.assertGreater(int(seq), 0) self.assertEqual(seq, item['sequence']) item.pop('sequence') self.assertDictEqual(item, {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP', 'take_key': 'take_key', 'article_text': 'The story body', 'priority': 'f', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', 'wordcount': '1', 'subject_detail': 'international court or tribunal', 'genre': 'Current', 'keyword': 'slugline', 'author': 'joe'})
def test_aap_ipnews_formatter_with_body_footer(self): subscriber ='subscribers', None, None)[0] doc = self.article.copy() doc['body_footer'] = 'call helpline 999 if you are planning to quit smoking' f = AAPIpNewsFormatter() seq, item = f.format(doc, subscriber)[0] self.assertGreater(int(seq), 0) self.assertEqual(seq, item['sequence']) item.pop('sequence') self.assertDictEqual(item, {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP', 'take_key': 'take_key', 'article_text': 'The story body<br>call helpline 999 if you are planning to quit smoking', 'priority': 'f', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', 'wordcount': '1', 'subject_detail': 'international court or tribunal', 'genre': 'Current', 'keyword': 'slugline', 'author': 'joe'})
def testMultipleCategories(self): article = { 'source': 'AAP', 'anpa_category': [{'name': 'Finance', 'qcode': 'F'}, {'name': 'Overseas Sport', 'qcode': 'S'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': 'body', 'word_count': '1', 'priority': 1, 'task': {'desk': 1}, 'place': [{'qcode': 'VIC', 'name': 'VIC'}] } subscriber ='subscribers', None, None)[0] f = AAPIpNewsFormatter() docs = f.format(article, subscriber) self.assertEqual(len(docs), 2) for seq, doc in docs: if doc['category'] == 'S': self.assertEqual(doc['subject_reference'], '15011002') self.assertEqual(doc['subject_detail'], 'four-man sled') self.assertEqual(doc['headline'], 'VIC:This is a test headline') if doc['category'] == 'F': self.assertEqual(doc['subject_reference'], '04001005') self.assertEqual(doc['subject_detail'], 'viniculture') self.assertEqual(doc['headline'], 'VIC:This is a test headline') codes = set(doc['selector_codes'].split(' ')) expected_codes = set('cxx 0fh axx az and pxx 0ah 0ir 0px 0hw pnd pxd cnd cxd 0nl axd'.split(' ')) self.assertSetEqual(codes, expected_codes)
def Test_join_selector_codes(self): f = AAPIpNewsFormatter() result = f._join_selector_codes('ipnewS', 'newsi', 'cnewsi', 'cnewsi') result_list = result.split() self.assertEqual(len(result_list), 12)