def TestIPNewsHtmlToText(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>The story body line 1<br>Line 2</p>\
                         <p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi more</p>',
            'word_count': '1',
            'priority': '1'
        }

        with self.app.app_context():
            subscriber = self.app.data.find('subscribers', None, None)[0]

            f = AAPIpNewsFormatter()
            seq, item = f.format(article, subscriber)[0]

            expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
                       'abcdefghi abcdefghi abcdefghi abcdefghi \r\nmore'
            self.assertEqual(item['article_text'], expected)
    def TestMultipleCategories(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'name': 'Finance', 'qcode': 'F'},
                              {'name': 'Overseas Sport', 'qcode': 'S'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': '1',
            'task': {'desk': 1}
        }

        with self.app.app_context():
            subscriber = self.app.data.find('subscribers', None, None)[0]

            f = AAPIpNewsFormatter()
            docs = f.format(article, subscriber)
            self.assertEqual(len(docs), 2)
            for seq, doc in docs:
                if doc['category'] == 'S':
                    self.assertEqual(doc['subject_reference'], '15011002')
                    self.assertEqual(doc['subject_detail'], 'four-man sled')
                if doc['category'] == 'F':
                    self.assertEqual(doc['subject_reference'], '04001005')
                    self.assertEqual(doc['subject_detail'], 'viniculture')
                    codes = set(doc['selector_codes'].split(' '))
                    expected_codes = set('cxx 0fh axx az and pxx 0ah 0ir 0px 0hw pnd pxd cnd cxd 0nl axd'.split(' '))
                    self.assertSetEqual(codes, expected_codes)
    def testGeoBlockNotTwoStates(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': '1',
            'task': {'desk': 1},
            'urgency': 1,
            'place': [{'qcode': 'VIC', 'name': 'VIC'}],
            'targeted_for': [{'name': 'New South Wales', 'allow': False}, {'name': 'Victoria', 'allow': False}]
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        seq, doc = f.format(article, subscriber)[0]
        codes = set(doc['selector_codes'].split(' '))
        expected_codes_str = 'an5 an4 an7 an6 ax5 ax6 ax7 an8 px6 ax4 ax8 px5 0ah 0px'
        expected_codes_str += ' px8 0fh px7 px4 pn4 pn5 pn6 pn7 px0'
        expected_codes = set(expected_codes_str.split(' '))
        self.assertSetEqual(codes, expected_codes)
    def testGeoBlockNotTwoStates(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': 1,
            'task': {'desk': 1},
            'urgency': 1,
            'place': [{'qcode': 'VIC', 'name': 'VIC'}],
            'targeted_for': [{'name': 'New South Wales', 'allow': False}, {'name': 'Victoria', 'allow': False}]
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        seq, doc = f.format(article, subscriber)[0]
        codes = set(doc['selector_codes'].split(' '))
        expected_codes_str = 'an5 an4 an7 an6 ax5 ax6 ax7 an8 px6 ax4 ax8 px5 0ah 0px'
        expected_codes_str += ' px8 0fh px7 px4 pn4 pn5 pn6 pn7 px0'
        expected_codes = set(expected_codes_str.split(' '))
        self.assertSetEqual(codes, expected_codes)
    def testIPNewsHtmlToText(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>The story body line 1<br>Line 2</p>\
                         <p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi more</p>',
            'word_count': '1',
            'priority': 1
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        seq, item = f.format(article, subscriber)[0]

        expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
                   'abcdefghi abcdefghi abcdefghi abcdefghi \r\nmore'
        self.assertEqual(item['article_text'], expected)
    def test_aap_ipnews_formatter_with_body_footer(self):
        subscriber = self.app.data.find('subscribers', None, None)[0]
        doc = self.article.copy()
        doc['body_footer'] = 'call helpline 999 if you are planning to quit smoking'

        f = AAPIpNewsFormatter()
        seq, item = f.format(doc, subscriber)[0]

        self.assertGreater(int(seq), 0)
        self.assertEqual(seq, item['sequence'])
        item.pop('sequence')
        self.assertDictEqual(
            item, {
                'category': 'a',
                'texttab': 't',
                'fullStory': 1,
                'ident': '0',
                'headline': 'VIC:This is a test headline',
                'service_level': 'a',
                'originator': 'AAP',
                'take_key': 'take_key',
                'article_text':
                'The story body<br>call helpline 999 if you are planning to quit smoking',
                'priority': 'f',
                'usn': '1',
                'subject_matter': 'international law',
                'news_item_type': 'News',
                'subject_reference': '02011001',
                'subject': 'crime, law and justice',
                'wordcount': '1',
                'subject_detail': 'international court or tribunal',
                'genre': 'Current',
                'keyword': 'slugline',
                'author': 'joe'
            })
Example #7
0
    def testIPNewsFormatter(self):
        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        seq, item = f.format(self.article, subscriber)[0]
        item = json.loads(item)

        self.assertGreater(int(seq), 0)
        self.assertEqual(seq, item['sequence'])
        item.pop('sequence')
        self.assertDictEqual(
            item, {
                'category': 'a',
                'texttab': 't',
                'fullStory': 1,
                'ident': '0',
                'headline': 'VIC:This is a test headline',
                'service_level': 'a',
                'originator': 'AAP',
                'take_key': 'take_key',
                'article_text': 'The story body',
                'priority': 'f',
                'usn': '1',
                'subject_matter': 'international law',
                'news_item_type': 'News',
                'subject_reference': '02011001',
                'subject': 'crime, law and justice',
                'wordcount': '1',
                'subject_detail': 'international court or tribunal',
                'genre': 'Current',
                'keyword': 'slugline',
                'author': 'joe'
            })
Example #8
0
    def testIpNewsFormatterNoSubject(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': 1,
            'task': {
                'desk': 1
            },
            'urgency': 1,
            'place': [{
                'qcode': 'VIC',
                'name': 'VIC'
            }]
        }
        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        seq, doc = f.format(article, subscriber)[0]
        doc = json.loads(doc)
        self.assertEqual(doc['subject_reference'], '00000000')
        self.assertEqual(doc['headline'], 'VIC:This is a test headline')

        article = {
            'source': 'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': None,
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': 1,
            'task': {
                'desk': 1
            },
            'urgency': 1,
            'place': None
        }

        seq, doc = f.format(article, subscriber)[0]
        doc = json.loads(doc)
        self.assertEqual(doc['subject_reference'], '00000000')
        self.assertEqual(doc['headline'], 'This is a test headline')
 def Test_is_in_subject(self):
     article = {
         'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}],
     }
     f = AAPIpNewsFormatter()
     self.assertTrue(f._is_in_subject(article, '150'))
     self.assertFalse(f._is_in_subject(article, '151'))
     self.assertTrue(f._is_in_subject(article, '04001'))
 def Test_set_selector_codes(self):
     article = {
         'task': {'desk': 1},
         'slugline': 'Test',
         'urgency': 3
     }
     f = AAPIpNewsFormatter()
     odbc_item = {}
     with self.app.app_context():
         f._set_selector_codes(article, 'ipnews', odbc_item, 'A')
         self.assertSetEqual(set(odbc_item['selector_codes'].split()),
                             set('and axd pnd cxd 0fh 0ir 0px 0ah 0hw cxx axx cnd 0nl az pxd pxx'.split()))
    def testIpNewsFormatterNoSubject(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': 1,
            'task': {'desk': 1},
            'urgency': 1,
            'place': [{'qcode': 'VIC', 'name': 'VIC'}]
        }
        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        seq, doc = f.format(article, subscriber)[0]
        doc = json.loads(doc)
        self.assertEqual(doc['subject_reference'], '00000000')
        self.assertEqual(doc['headline'], 'VIC:This is a test headline')

        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': None,
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': 1,
            'task': {'desk': 1},
            'urgency': 1,
            'place': None
        }

        seq, doc = f.format(article, subscriber)[0]
        doc = json.loads(doc)
        self.assertEqual(doc['subject_reference'], '00000000')
        self.assertEqual(doc['headline'], 'This is a test headline')
 def TestIPNewsFormatter(self):
     with self.app.app_context():
         output_channel = self.app.data.find('output_channels', None, None)[0]
         f = AAPIpNewsFormatter()
         seq, item = f.format(self.article, output_channel, self.sel_codes)
         self.assertGreater(int(seq), 0)
         self.assertEquals(seq, item['sequence'])
         item.pop('sequence')
         self.assertDictEqual(item,
                              {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0',
                               'headline': 'This is a test headline', 'selector_codes': 'aaa bbb',
                               'service_level': 'a', 'originator': 'AAP', 'take_key': 'take_key',
                               'article_text': 'The story body', 'priority': '1', 'usn': '1',
                               'subject_matter': 'international law', 'news_item_type': 'News',
                               'subject_reference': '02011001', 'subject': 'crime, law and justice',
                               'wordcount': '1', 'subject_detail': 'international court or tribunal',
                               'genre': 'Current', 'keyword': 'slugline', 'author': 'joe'})
    def testIPNewsFormatter(self):
        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        seq, item = f.format(self.article, subscriber)[0]
        item = json.loads(item)

        self.assertGreater(int(seq), 0)
        self.assertEqual(seq, item['sequence'])
        item.pop('sequence')
        self.assertDictEqual(item,
                             {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0',
                              'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP',
                              'take_key': 'take_key', 'article_text': 'The story body', 'priority': 'f', 'usn': '1',
                              'subject_matter': 'international law', 'news_item_type': 'News',
                              'subject_reference': '02011001', 'subject': 'crime, law and justice',
                              'wordcount': '1', 'subject_detail': 'international court or tribunal',
                              'genre': 'Current', 'keyword': 'slugline', 'author': 'joe'})
    def test_aap_ipnews_formatter_with_body_footer(self):
        subscriber = self.app.data.find('subscribers', None, None)[0]
        doc = self.article.copy()
        doc['body_footer'] = 'call helpline 999 if you are planning to quit smoking'

        f = AAPIpNewsFormatter()
        seq, item = f.format(doc, subscriber)[0]

        self.assertGreater(int(seq), 0)
        self.assertEqual(seq, item['sequence'])
        item.pop('sequence')
        self.assertDictEqual(item,
                             {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0',
                              'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP',
                              'take_key': 'take_key',
                              'article_text': 'The story body<br>call helpline 999 if you are planning to quit smoking',
                              'priority': 'f', 'usn': '1',
                              'subject_matter': 'international law', 'news_item_type': 'News',
                              'subject_reference': '02011001', 'subject': 'crime, law and justice',
                              'wordcount': '1', 'subject_detail': 'international court or tribunal',
                              'genre': 'Current', 'keyword': 'slugline', 'author': 'joe'})
    def testMultipleCategories(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'name': 'Finance', 'qcode': 'F'},
                              {'name': 'Overseas Sport', 'qcode': 'S'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': 'body',
            'word_count': '1',
            'priority': 1,
            'task': {'desk': 1},
            'place': [{'qcode': 'VIC', 'name': 'VIC'}]
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]

        f = AAPIpNewsFormatter()
        docs = f.format(article, subscriber)
        self.assertEqual(len(docs), 2)
        for seq, doc in docs:
            if doc['category'] == 'S':
                self.assertEqual(doc['subject_reference'], '15011002')
                self.assertEqual(doc['subject_detail'], 'four-man sled')
                self.assertEqual(doc['headline'], 'VIC:This is a test headline')
            if doc['category'] == 'F':
                self.assertEqual(doc['subject_reference'], '04001005')
                self.assertEqual(doc['subject_detail'], 'viniculture')
                self.assertEqual(doc['headline'], 'VIC:This is a test headline')
                codes = set(doc['selector_codes'].split(' '))
                expected_codes = set('cxx 0fh axx az and pxx 0ah 0ir 0px 0hw pnd pxd cnd cxd 0nl axd'.split(' '))
                self.assertSetEqual(codes, expected_codes)
 def Test_join_selector_codes(self):
     f = AAPIpNewsFormatter()
     result = f._join_selector_codes('ipnewS', 'newsi', 'cnewsi', 'cnewsi')
     result_list = result.split()
     self.assertEqual(len(result_list), 12)