class NitfFormatterTest(TestCase):
    def setUp(self):
        super().setUp()
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')
    def __init__(self):
        Formatter.__init__(self)

        # in elements' dicts, following key can be used:
        # - 'nitf': new NITF compatible element to use (empty string to remove)
        # - 'attrib': new attribute to use (replace existing one if set)
        # - 'filter': callback to use for complex changes (root_elment and element as arguments)
        self.HTML2NITF = {
            'p': {},
            'b': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'br': {'filter': self.br_filter},
            'strong': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'i': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'em': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'u': {
                'nitf': 'em',
                'attrib': {'class': 'underscore'}},
            'strike': {'nitf': 'em'},
            'sup': {},
            'sub': {},
            'a': {},
            'img': {'nitf': ''},  # <img> use <media> in nitf, so we remove element
            'blockquote': {'nitf': 'bq'},
            'pre': {},
            'ol': {},
            'ul': {},
            'li': {},
            # FIXME: hl1 is not used here as it can only appear in <hedline>
            'h1': {'nitf': 'hl2'},
            'h2': {'nitf': 'hl2'},
            'h3': {'nitf': 'hl2'},
            'h4': {'nitf': 'hl2'},
            'h5': {'nitf': 'hl2'},
            'h6': {'nitf': 'hl2'},
            # tables
            'table': {},
            'tbody': {},
            'tr': {},
            'td': {},
            'th': {},
            'style': {'nitf': EraseElement},  # <style> may be there in case of bad paste
        }
Example #3
0
    def __init__(self):
        Formatter.__init__(self)

        # in elements' dicts, following key can be used:
        # - 'nitf': new NITF compatible element to use (empty string to remove)
        # - 'attrib': new attribute to use (replace existing one if set)
        # - 'filter': callback to use for complex changes (root_elment and element as arguments)
        self.HTML2NITF = {
            'p': {},
            'b': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'br': {'filter': self.br_filter},
            'strong': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'i': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'em': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'u': {
                'nitf': 'em',
                'attrib': {'class': 'underscore'}},
            'strike': {'nitf': 'em'},
            'sup': {},
            'sub': {},
            'a': {},
            'img': {'nitf': ''},  # <img> use <media> in nitf, so we remove element
            'blockquote': {'nitf': 'bq'},
            'pre': {},
            'ol': {},
            'ul': {},
            'li': {},
            # FIXME: hl1 is not used here as it can only appear in <hedline>
            'h1': {'nitf': 'hl2'},
            'h2': {'nitf': 'hl2'},
            'h3': {'nitf': 'hl2'},
            'h4': {'nitf': 'hl2'},
            'h5': {'nitf': 'hl2'},
            'h6': {'nitf': 'hl2'},
            # tables
            'table': {},
            'tbody': {},
            'tr': {},
            'td': {},
            'th': {},
            'style': {'nitf': EraseElement},  # <style> may be there in case of bad paste
        }
 def setUp(self):
     super(TestCase, self).setUp()
     article_legacy = ARTICLE.copy()
     article_legacy['anpa_category'] = [{
         'name': 'service1'
     }, {
         'name': 'service2'
     }, {
         'name': 'service3'
     }]
     self.formatter = NTBNITFMultiFileFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.tz = pytz.timezone(self.app.config['DEFAULT_TIMEZONE'])
     if self.article is None:
         # formatting is done once for all tests to save time
         # as long as used attributes are not modified, it's fine
         self.article = article_legacy
         self.formatter_output = self.formatter.format(
             self.article, {'name': 'Test NTBNITF'})
         self.docs = [
             formatter['encoded_item']
             for formatter in self.formatter_output
         ]
         self.nitf_xmls = [etree.fromstring(doc) for doc in self.docs]
         self.nitf_xml = self.nitf_xmls[0]
 def setUp(self):
     self.formatter = AgendaPlanningFormatter()
     self.base_formatter = Formatter()
     planning_init_app(self.app)
     init_agenda(self.app)
     self.app.data.insert('locations', self.locations)
     self.app.data.insert('agenda_city_map', self.city_map)
     self.app.data.insert('agenda_iptc_map', self.iptc_map)
     self.app.data.insert('contacts', self.contacts)
    def __init__(self):
        Formatter.__init__(self)

        # in elements' dicts, following key can be used:
        # - 'nitf': new NITF compatible element to use (empty string to remove)
        # - 'attrib': new attribute to use (replace existing one if set)
        # - 'filter': callback to use for complex changes (root_elment and element as arguments)
        self.HTML2NITF = {
            "p": {},
            "b": {"nitf": "em", "attrib": {"class": "bold"}},
            "br": {"filter": self.br_filter},
            "strong": {"nitf": "em", "attrib": {"class": "bold"}},
            "i": {"nitf": "em", "attrib": {"class": "italic"}},
            "em": {"nitf": "em", "attrib": {"class": "italic"}},
            "u": {"nitf": "em", "attrib": {"class": "underscore"}},
            "strike": {"nitf": "em"},
            "sup": {},
            "sub": {},
            "a": {},
            "img": {"nitf": ""},  # <img> use <media> in nitf, so we remove element
            "blockquote": {"nitf": "bq"},
            "pre": {},
            "ol": {},
            "ul": {},
            "li": {},
            # FIXME: hl1 is not used here as it can only appear in <hedline>
            "h1": {"nitf": "hl2"},
            "h2": {"nitf": "hl2"},
            "h3": {"nitf": "hl2"},
            "h4": {"nitf": "hl2"},
            "h5": {"nitf": "hl2"},
            "h6": {"nitf": "hl2"},
            # tables
            "table": {},
            "tbody": {},
            "tr": {},
            "td": {},
            "th": {},
            "style": {"nitf": EraseElement},  # <style> may be there in case of bad paste
        }
Example #7
0
 def setUp(self):
     super().setUp()
     self.formatter = NTBNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.tz = pytz.timezone(self.app.config['DEFAULT_TIMEZONE'])
     if self.article is None:
         # formatting is done once for all tests to save time
         # as long as used attributes are not modified, it's fine
         self.article = ARTICLE
         self.formatter_output = self.formatter.format(self.article, {'name': 'Test NTBNITF'})
         self.doc = self.formatter_output[0]['encoded_item']
         self.nitf_xml = etree.fromstring(self.doc)
Example #8
0
class NitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': True
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': False
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(
            nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_html2nitf(self):
        html = etree.fromstring(
            dedent("""\
            <div>
                <unknown>
                    <p>
                        this should be still here
                    </p>
                </unknown>
                <p style="this='is';some='style'">
                    <strong>this text should be
                        <i>modified</i>
                    </strong>
                    so
                    <span>[this should not be removed]</span>
                    unkown
                    <em unknown_attribute="toto">elements</em>
                    and
                    <a bad_attribute="to_remove">attributes</a>
                    are
                    <h6>removed</h6>
                </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        this should be still here
                    </p>
                <p>
                    <em class="bold">this text should be
                        <em class="italic">modified</em>
                    </em>
                    so [this should not be removed] unkown
                    <em class="italic">elements</em>
                    and
                    <a>attributes</a>
                    are
                    <hl2>removed</hl2>
                </p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(
            etree.tostring(nitf,
                           encoding='unicode').replace('\n',
                                                       '').replace(' ', ''),
            expected)

    def test_html2nitf_br(self):
        """Check that <br/> is kept if it is a child of and enrichedText parent element"""
        html = etree.fromstring(
            dedent("""\
            <div>
                <br/>the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_body_content_br(self):
        article = {
            "_id":
            "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "guid":
            "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "state":
            "published",
            "body_html":
            "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it "
            "may have been a 10-minute delay by police that cost two lives.</p><p>Shortly after 2am "
            "on December 16, 2014, gunman Man Haron Monis fired at escaping hostages.</p><p>That first "
            "shot by Monis&nbsp;should have satisfied a so-called \"secondary trigger\" prompting "
            "police to storm the Martin Place stronghold with some element of surprise, NSW Coroner "
            "Michael Barnes found on Wednesday.</p><p>\"(But) the 10 minutes that lapsed without "
            "decisive action by police was too long,\" Mr Barnes&nbsp;said in Sydney.</p><p>By the "
            "time police smashed their way into the cafe at 2.14am in a flurry of stun-grenade "
            "explosions, manager Tori Johnson had been forced to his knees and shot in the head."
            "</p><p>Barrister Katrina Dawson was killed by police bullet fragments.</p><p>New police "
            "commissioner Mick Fuller - who was one of the first commanders at the scene - admitted on "
            "Wednesday tactical officers should have gone in earlier.</p><p>Mr Fuller went further than "
            "the coroner when he told the Ten Network: \"We probably should have gone in before the "
            "first shot.\"</p><p>\"Early intervention for terrorist incidents is the way forward, "
            "knowing what we know now,\" he said.</p><p>\"But nevertheless it's still a very "
            "dangerous tactic and people will potentially still lose their lives.\"</p><p>Mr Barnes "
            "has made 45 findings on everything from police negotiation strategies to greater legal "
            "protections for frontline officers in terrorist situations.<br></p><p>He lay the blame "
            "for the loss of life squarely at the feet of Monis, but other parties, including prosecution "
            "solicitors and a consulting psychiatrist, copped some criticism.</p><p>The cafe "
            "was \"transformed into a prison run by a vicious maniac\" that day, Mr Barnes "
            "said.<br></p><p class=\"\">The traditional \"contain and negotiate\" strategy was "
            "appropriate early on but police failed to adequately reassess taking into account the "
            "headway they were making with Monis.<br></p><p class=\"\">\"Sadly, it failed,\" Mr Barnes "
            "said on Wednesday.<br></p><p class=\"\">\"The 'contain and negotiate' approach may not be "
            "the best ongoing response to a terrorist incident if the offenders believe, whether or not "
            "they survive, their cause will benefit from the publicity generated by a protracted "
            "siege.\"<br></p><p class=\"\">Mr Fuller said the containment strategy had saved countless "
            "lives over the years and wouldn't be abandoned for non-terrorist incidents.</p>"
            "<p class=\"\">Police mistakenly thought Monis' backpack could house explosives, "
            "but Mr Barnes noted senior officers were \"unduly reluctant\" to initiate direct action "
            "plans during siege situations.</p><p class=\"\">The \"primary trigger\" for such an "
            "assault was identified as the death or serious injury of a hostage - evidence which "
            "disgusted the families of victims and survivors.</p><p class=\"\">The police response was "
            "at times hampered by mishaps.</p><p>Eight calls to a phone number hostages expected would "
            "connect them with negotiators were missed, which was a \"significant failure\", Mr Barnes "
            "said.</p><p>Commanders and tactical officers received specialist terrorist training but "
            "negotiators received \"little, if any\", the coroner added.</p><p>The stand-off could not "
            "have eventuated in the first place if Monis had not been granted bail on accessory to murder"
            " and dozens of sexual assault offences.<br></p><p>Mr Barnes found the work of an Office of "
            "the Director of Public Prosecutions solicitor in December 2013 was inadequate, "
            "\"erroneously\" advising a court Monis didn't have to show exceptional circumstances "
            "in arguing for bail.</p><p>Police also made a mistake by issuing Monis with a court "
            "attendance notice for the sexual offences in October 2014 rather than arresting him"
            ".</p><p>Monis was already on bail at the time for a commonwealth offence after he'd "
            "written offensive letters to the families of Australian soldiers killed in the Middle"
            " East.</p><p>States can find it difficult to access commonwealth records, Mr Barnes said,"
            " and he called for that to be remedied.</p><p>Some of the coroner's harshest individual "
            "criticism was reserved for the consultant psychiatrist who advised police.</p>"
            "<p>His \"sub-optimal\" performance included a belief that Monis was merely grandstanding,"
            " Mr Barnes found.</p><p>The doctor should not have been permitted to advise on negotiation "
            "strategy and he made \"erroneous and unrealistic assessments\" of what was happening inside "
            "the cafe.</p><p>The psychiatrist's advice was ambiguous and Islamic terrorism was beyond his"
            " expertise.</p><p>\"The police commanders underestimated the threat Monis posed,\" Mr Barnes"
            " said, in part blaming their reliance upon the psychiatrist's opinion.</p><p>He recommended "
            "a more diverse panel of experts be used in the future.</p><p>The coroner stated police "
            "snipers couldn't have ended the siege despite a 10-minute window where they had clear sight "
            "of a head that could have been Monis.</p><p>Those gunmen were never confident in their legal "
            "justification for a \"kill shot\" and Mr Barnes suggested their&nbsp;power to use force "
            "should be more clearly defined.</p><p>The coroner did acknowledge that sending tactical "
            "officers into the cafe after their hand was forced was a decision no commander would "
            "ever want to face.</p><p>\"The bravery of these officers inspires awe,\" he said.<br></p>",
            "pubstatus":
            "usable",
            "type":
            "text",
            "abstract":
            "<p>The NSW coroner believes a 10-minute period of inaction by police before the bloody end "
            "of the 2014 Lindt Cafe siege was \"too long\".</p>",
            "priority":
            6,
            "unique_id":
            12055427,
            "format":
            "HTML",
            "genre": [{
                "qcode": "Wrap",
                "name": "Wrap"
            }],
            "word_count":
            843,
            "source":
            "AAP",
            "urgency":
            1,
            "subject": [{
                "qcode": "16001000",
                "parent": "16000000",
                "name": "act of terror"
            }, {
                "qcode": "02001010",
                "parent": "02001000",
                "name": "terrorism"
            }],
            "flags": {
                "marked_archived_only": False,
                "marked_for_legal": False,
                "marked_for_not_publication": False,
                "marked_for_sms": False
            },
            "headline":
            "'Ten minutes was too long': Lindt siege",
            "dateline": {
                "source": "AAP",
                "text": "SYDNEY, May 24 AAP -",
                "located": {
                    "city": "Sydney",
                    "country_code": "AU",
                    "country": "Australia",
                    "dateline": "city",
                    "state_code": "NSW",
                    "state": "New South Wales",
                    "alt_name": "",
                    "tz": "Australia/Sydney",
                    "city_code": "Sydney"
                }
            },
            "anpa_category": [{
                "qcode": "a",
                "name": "Australian General News"
            }],
            "unique_name":
            "#12055427",
            "place": [{
                "name": "NSW",
                "qcode": "NSW",
                "country": "Australia",
                "world_region": "Oceania",
                "group": "Australia",
                "state": "New South Wales"
            }],
            "sign_off":
            "SN/jmk/jcd/pmu",
            "anpa_take_key":
            "2nd Wrap (pix/video available)",
            "language":
            "en",
            "slugline":
            "Cafe",
            "byline":
            "Jamie McKinnell",
            "version":
            2,
        }

        response = self.formatter.format(article, {})
        nitf_xml = etree.fromstring(response[0][1])
        self.assertEqual(
            etree.tostring(nitf_xml.find('body/body.content/p'),
                           encoding="unicode"),
            "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it may "
            "have been a 10-minute delay by police that cost two lives.</p>\n      "
        )
        self.assertTrue(
            nitf_xml.findall('body/body.content/p')[1].text.startswith(
                'Shortly after 2am'))

    def test_html2nitf_br_last(self):
        """Check that last <br/> in a <p> element is removed"""
        html = etree.fromstring(
            dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:<br/>
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_html2nitf_style_cleaning(self):
        """Check that <style> element and style attribute are removed from HTML"""
        html = etree.fromstring(
            dedent("""\
            <div>
                <style type="text/css">
                    p { margin-bottom: 0.25cm; line-height: 120%; }a:link {  }
                </style>
                <p style="margin-bottom: 0cm; line-height: 100%">Test bla bla bla</p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <br/>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <font face="DejaVu Sans, sans-serif">
                        <font style="font-size: 14pt" size="4">
                            <i>
                                <u>
                                    <b>test</b>
                                </u>
                            </i>
                        </font>
                    </font>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">toto</p>
                <p style="margin-bottom: 0cm; line-height: 100%">titi</p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                <p>Test bla bla bla</p>
                <p>
                </p>
                <p>
                    <em class="italic">
                        <em class="underscore">
                            <em class="bold">test</em>
                        </em>
                    </em>
                </p>
                <p>toto</p>
                <p>titi</p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(
            etree.tostring(nitf,
                           encoding='unicode').replace('\n',
                                                       '').replace(' ', ''),
            expected)

    def test_table(self):
        html_raw = """
        <div>
        <table>
            <tbody>
                <tr>
                    <td>Table cell 1</td>
                    <td>Table cell 2</td>
                    <td>Table cell 3</td>
                </tr>
                <tr>
                    <td>Table cell 2.1</td>
                    <td>Table cell 2.2</td>
                    <td>Table cell 2.3</td>
                </tr>
                <tr>
                    <td>Table cell 3.1</td>
                    <td>Table cell 3.2</td>
                    <td>Table cell 3.3</td>
                </tr>
            </tbody>
        </table>
        </div>
        """.replace('\n', '').replace(' ', '')
        html = etree.fromstring(html_raw)
        nitf = self.formatter.html2nitf(html)
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), html_raw)

    def test_company_codes(self):
        article = {
            'guid':
            'tag:aap.com.au:20150613:12345',
            '_current_version':
            1,
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'source':
            'AAP',
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001',
                'name': 'international court or tribunal'
            }, {
                'qcode': '02011002',
                'name': 'extradition'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'body_html':
            'The story body',
            'type':
            'text',
            'word_count':
            '1',
            'priority':
            '1',
            '_id':
            'urn:localhost.abc',
            'state':
            'published',
            'urgency':
            2,
            'pubstatus':
            'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline':
            'sample creditline',
            'keywords': ['traffic'],
            'abstract':
            'sample abstract',
            'place': [{
                'qcode': 'Australia',
                'name': 'Australia',
                'state': '',
                'country': 'Australia',
                'world_region': 'Oceania'
            }],
            'company_codes': [{
                'name': 'YANCOAL AUSTRALIA LIMITED',
                'qcode': 'YAL',
                'security_exchange': 'ASX'
            }]
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'type':
            'text',
            'body_html':
            '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count':
            '1',
            'priority':
            1,
            "linked_in_packages": [{
                "package": "package",
                "package_type": "takes"
            }],
        }
        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text,
            'Tommi Mäkinen crashes a Škoda in Äppelbo')
 def setUp(self):
     super().setUp()
     self.formatter = NITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
class NitfFormatterTest(TestCase):
    def setUp(self):
        super().setUp()
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')
class IRESSNITFFormatterTest(AAPTestCase):
    line_ender = b'\x19\x0D\x0A'.decode()
    line_feed = 'ZZZZYYYY\n'
    line_prefix = '   '

    def setUp(self):
        self.formatter = IRESSNITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)
        self.setUpData()

    def setUpData(self):
        copyrights = [{
            "_id": "rightsinfo",
            "items": [
                {
                    "usageTerms": "Usage terms goes here.",
                    "copyrightHolder": "Australian Associated Press",
                    "is_active": True,
                    "copyrightNotice": "Notice goes here.",
                    "name": "AAP"
                },
                {
                    "usageTerms": "Usage terms goes here.",
                    "copyrightHolder": "Australian Associated Press",
                    "is_active": True,
                    "copyrightNotice": "Notice goes here.",
                    "name": "default"
                },
                {
                    "usageTerms": "Usage terms goes here.",
                    "copyrightHolder": "Foo bar",
                    "is_active": True,
                    "copyrightNotice": "Notice goes here.",
                    "name": "SUP"
                }
            ],
            "type": "manageable",
            "_etag": "init",
            "display_name": "Copyrights",
        }]
        self.app.data.insert('vocabularies', copyrights)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p><p>test body</p>',
            'slugline': 'keyword',
            'anpa_take_key': 'take-key',
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'original_source': 'EMAIL',
            'type': 'text',
            'priority': '9',
            'source': 'SUP',
            '_id': 'urn:localhost.abc',
            'guid': 'urn:localhost.abc',
            'urgency': 2,
            'word_count': 99,
            'unique_id': 11,
            'place': [{'qcode': 'FED'}],
            'sign_off': 'me',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'byline': 'Bar Foo'
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'FED:test headline')
        self.assertEqual(nitf_xml.find('body/body.content/pre').text,
                         '   Bar FooZZZZYYYY\n   test bodyZZZZYYYY\n   test bodyZZZZYYYY\n   SUP meZZZZYYYY\n')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-sequence"]').get('content'),
                         str(doc['published_seq_num']).zfill(4))
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-keyword"]').get('content'), 'keyword')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-takekey"]').get('content'), 'take-key')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-category"]').get('content'), 'f')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-wordcount"]').get('content'), '0099')
        self.assertEqual(nitf_xml.find('body/body.head/hedline/hl1').text, 'FED:test headline')
        self.assertEqual(nitf_xml.find('head/docdata/doc.copyright').get('year'), '2018')
        self.assertEqual(nitf_xml.find('head/docdata/doc.copyright').get('holder'), 'Foo bar')
        self.assertEqual(nitf_xml.find('head/docdata/doc-id').get('id-string'), 'AAP.20180613.11')
        self.assertEqual(nitf_xml.find('head/docdata/date.issue').get('norm'), '20180613T214519')
        self.assertEqual(nitf_xml.find('head/docdata/date.release').get('norm'), '20180613T214519')
        self.assertEqual(len(nitf_xml.findall('body/body.end')), 0)

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [
                {'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'},
                {'name': 'BHP Billiton Limited', 'qcode': 'BHP', 'security_exchange': 'ASX'}
            ]
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/meta[@name="asx-codes"]').get('content'), "YAL BHP")

    def testDivContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'body_html': '<div>Kathmandu Holdings has lodged a claim in the New Zealand High'
                         'Court for the recovery of costs associated with last years takeover bid from Briscoe'
                         'Group.</div><div>Kathmandu Holdings has lodged a claim in the New Zealand High Court for '
                         'the recovery of costs associated with last years takeover bid from Briscoe Group.'
                         '</div><div><br></div><div><br></div><div>Kathmandu incurred costs in relation to the takeover'
                         'After an initial request for payment on November 20, 2015 and subsequent correspondence, '
                         'Briscoe made a payment of $637,711.65 on May 25, 2016 without prejudice to its position on '
                         'what sum Kathmandu is entitled to recover.</div><div><br></div><div>Kathmandu considers the '
                         'full amount claimed is recoverable and has issued legal proceedings for the balance of monies'
                         ' owed.</div>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue(nitf_xml.find('body/body.content/pre').text.
                        startswith('   joeZZZZYYYY\n   Kathmandu Holdings'))
        self.assertTrue(nitf_xml.find('body/body.content/pre').text.split('\n'), 7)

    def testLFContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">The Australian dollar has tumbled'
                         ' after&nbsp;</span>Standard &amp; Poor\'s warned the country\'s triple-A credit rating '
                         'is at risk.<br></p><p>   At 1200 AEST on Thursday, the currency was trading at 74.98 US '
                         'cents, up from\n\n 74.41\n\n cents on Wednesday, but down from a high of 75.38 on Thursday '
                         'morning.</p><p>S&amp;P downgraded its outlook on Australia\'s credit rating from stable '
                         'to negative, due to the prospect of ongoing budget deficits without substantial reforms '
                         'being passed by parliament.</p><p>Westpac chief currency strategist&nbsp;Robert Rennie '
                         'said the uncertain election outcome is likely to result in a longer run of budget'
                         'deficits.</p><p>\"It was clearly a risk and the market has been living in its shadow since'
                         'Monday morning,\" he said.</p><p>\"Gridlock or the inability to improve the fiscal situation '
                         'over the forecast period is something I think a ratings agency ought to take into '
                         'account.\"</p><p><span style=\"background-color: transparent;\">The currency had a sudden '
                         'plunge to 74.67 US cents on the announcement from S&amp;P, before recovering some of that '
                         'ground.</span></p><p><span style=\"background-color: transparent;\">Mr Rennie tipped the '
                         'Australian dollar will slip further on Thursday.</span></p><p><span style=\"background-color:'
                         'transparent;\">\"We should make fresh lows, we should be pushing down though 74 US cents '
                         'and possibly lower,\" he said.</span></p><p><span style=\"background-color: '
                         'transparent;\">KEY MOVEMENTS:</span></p><p><span style=\"background-color: transparent;\">One'
                         'Australian dollar buys:</span><br></p><p>   * 74.98 US cents, from\n\n 74.41\n\ncents on '
                         'Wednesday</p><p>   * 75.63 Japanese yen, from \n\n75.15\n\n yen</p><p>   * 67.64 euro cents, '
                         'from \n\n67.24\n\n euro cents</p><p>   * 105.01 New Zealand cents, from \n\n104.85\n\n NZ '
                         'cents</p><p>   * 57.96 British pence, from \n\n57.53\n\n pence</p><p>   Government bond '
                         'yields:</p><p>   * CGS 5.25pct March 2019, 1.510pct, from \n\n1.513pct</p><p>   * CGS 4.25pct'
                         'April 2026, 1.868pct, from \n\n1.862pct</p><p>   Sydney Futures Exchange prices:</p><p>   *'
                         'September 2016 10-year bond futures contract, was at 98.125\n\n (1.875\n\n per cent), '
                         'unchanged from Wednesday</p><p>   * September 2016 3-year bond futures contract, at 98.570 '
                         '(1.430 per cent), up from \n\n98.550\n\n (1.450\n\n per cent)</p><p>   (*Currency closes '
                         'taken at 1700 AEST previous local session, bond market closes taken at 1630 AEST previous '
                         'local session)</p><p>   Source: IRESS</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue('from 74.41' in nitf_xml.find('body/body.content/pre').text)

    def testStraySpaceContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'body_html': '<p><span style=\"background-color: transparent;\">\"</span>'
                         '<span style=\"background-color: transparent;\">However</span></p>'
                         '<p>\"<span style=\"background-color: transparent;\">The proposed</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text,
                         '\"However   \"The proposedZZZZYYYY\n   AAPZZZZYYYY\n')

    def testSpacesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'body_html': '<p>a b  c   d&nbsp;e&nbsp;&nbsp;f\xA0g</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text, '   a b c d e  f gZZZZYYYY\n   AAPZZZZYYYY\n')

    def testControlCharsContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">\u0018\u0012\f \u000b\u0012\b</span>'
                         '<span style=\"background-color: transparent;\">\u0005\f\u0006\b \u0006\f\u0019&nbsp;</span>'
                         '</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text, '      AAPZZZZYYYY\n')

    def testNullTakeKeyContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': None,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': None,
            'unique_id': 1,
            'type': 'text',
            'body_html': '<p>no body</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertIsNone(nitf_xml.find('head/meta[@name="anpa-takekey"]'))

    def testLocator(self):
        article = {
            '_id': '4853',
            'slugline': 'Gangs',
            'byline': '',
            'anpa_category': [
                {
                    'name': 'Australian General News',
                    'qcode': 'a'
                }
            ],
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'genre': [
                {
                    'name': 'Article (news)',
                    'qcode': 'Article'
                }
            ],
            'priority': 6,
            'unique_id': 33423059,
            'format': 'HTML',
            'guid': '4853',
            'headline': 'Drop gang campaign, bishop tells Vic Libs',
            'source': 'TEST',
            'subject': [
                {
                    'name': 'crime, law and justice',
                    'qcode': '02000000'
                }
            ],
            'flags': {
                'marked_for_sms': False,
                'marked_for_not_publication': False,
                'marked_archived_only': False,
                'marked_for_legal': False
            },
            'anpa_take_key': None,
            'pubstatus': 'usable',
            'schedule_settings': {
                'time_zone': None,
                'utc_embargo': None,
                'utc_publish_schedule': None
            },
            'urgency': 5,
            'word_count': 83,
            'type': 'text',
            'place': [
                {
                    'name': 'VIC',
                    'qcode': 'VIC',
                    'world_region': 'Oceania',
                    'state': 'Victoria',
                    'country': 'Australia',
                    'group': 'Australia'
                }
            ],
            'state': 'corrected',
            'body_html': '<p>A Melbourne-based Anglican bishop is calling on the Liberal party</P',
            '_current_version': 3,
            'sign_off': 'MG',
            'ednote': 'In the story \'Gangs\' sent at: 16/07/2018 15:26\r\n\r\nThis is a corrected repeat.',
            'sms_message': ''
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'VIC:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(nitf_xml.find('body/body.head/hedline/hl1').text,
                         'VIC:Drop gang campaign, bishop tells Vic Libs')

        article['anpa_category'] = [{'name': 'Domestic Sports', 'qcode': 't'}]
        article['subject'] = [{'name': 'Cricket', 'qcode': '15017000'}]
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'CRIK:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(nitf_xml.find('body/body.head/hedline/hl1').text,
                         'CRIK:Drop gang campaign, bishop tells Vic Libs')

    def testSmartQuotes(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': None,
            'format': 'HTML',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': None,
            'unique_id': 1,
            'type': 'text',
            'body_html': '<p>“Why is society being so childish.” and the Yen ¥</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text, '   "Why is society being so childish." and '
                                                                      'the Yen Y=ZZZZYYYY\n   AAPZZZZYYYY\n')
class NitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_html2nitf(self):
        html = etree.fromstring(dedent("""\
            <div>
                <unknown>
                    <p>
                        this should be still here
                    </p>
                </unknown>
                <p style="this='is';some='style'">
                    <strong>this text should be
                        <i>modified</i>
                    </strong>
                    so
                    <span>[this should not be removed]</span>
                    unkown
                    <em unknown_attribute="toto">elements</em>
                    and
                    <a bad_attribute="to_remove">attributes</a>
                    are
                    <h6>removed</h6>
                </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        this should be still here
                    </p>
                <p>
                    <em class="bold">this text should be
                        <em class="italic">modified</em>
                    </em>
                    so [this should not be removed] unkown
                    <em class="italic">elements</em>
                    and
                    <a>attributes</a>
                    are
                    <hl2>removed</hl2>
                </p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(etree.tostring(nitf, 'unicode').replace('\n', '').replace(' ', ''), expected)

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')
Example #13
0
 def setUp(self):
     self.formatter = AAPNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
Example #14
0
class AAPNitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = AAPNITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'slugline': 'keyword',
            'anpa_take_key': 'take-key',
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'original_source': 'EMAIL',
            'type': 'text',
            'priority': '9',
            'source': 'SUP',
            '_id': 'urn:localhost.abc',
            'urgency': 2,
            'place': [{'qcode': 'FED'}],
            'sign_off': 'me'
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-priority"]').get('content'), '9')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-sequence"]').get('content'),
                         str(doc['published_seq_num']))
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-keyword"]').get('content'), 'keyword')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-takekey"]').get('content'), 'take-key')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-source"]').get('content'), 'SUP')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-original-source"]').get('content'), 'EMAIL')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-place"]').get('content'), 'FED')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-signoff"]').get('content'), 'me')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-category"]').get('content'), 'f')

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testDivContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<div>Kathmandu Holdings has lodged a claim in the New Zealand High'
                         'Court for the recovery of costs associated with last years takeover bid from Briscoe'
                         'Group.</div><div>Kathmandu Holdings has lodged a claim in the New Zealand High Court for '
                         'the recovery of costs associated with last years takeover bid from Briscoe Group.'
                         '</div><div><br></div><div>Kathmandu incurred costs in relation to the takeover bid. '
                         'After an initial request for payment on November 20, 2015 and subsequent correspondence, '
                         'Briscoe made a payment of $637,711.65 on May 25, 2016 without prejudice to its position on '
                         'what sum Kathmandu is entitled to recover.</div><div><br></div><div>Kathmandu considers the '
                         'full amount claimed is recoverable and has issued legal proceedings for the balance of monies'
                         ' owed.</div>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertTrue(nitf_xml.findall('body/body.content/p')[5].text.startswith('Kathmandu considers'))

    def testLFContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">The Australian dollar has tumbled'
                         ' after&nbsp;</span>Standard &amp; Poor\'s warned the country\'s triple-A credit rating '
                         'is at risk.<br></p><p>   At 1200 AEST on Thursday, the currency was trading at 74.98 US '
                         'cents, up from\n\n 74.41\n\n cents on Wednesday, but down from a high of 75.38 on Thursday '
                         'morning.</p><p>S&amp;P downgraded its outlook on Australia\'s credit rating from stable '
                         'to negative, due to the prospect of ongoing budget deficits without substantial reforms '
                         'being passed by parliament.</p><p>Westpac chief currency strategist&nbsp;Robert Rennie '
                         'said the uncertain election outcome is likely to result in a longer run of budget'
                         'deficits.</p><p>\"It was clearly a risk and the market has been living in its shadow since'
                         'Monday morning,\" he said.</p><p>\"Gridlock or the inability to improve the fiscal situation '
                         'over the forecast period is something I think a ratings agency ought to take into '
                         'account.\"</p><p><span style=\"background-color: transparent;\">The currency had a sudden '
                         'plunge to 74.67 US cents on the announcement from S&amp;P, before recovering some of that '
                         'ground.</span></p><p><span style=\"background-color: transparent;\">Mr Rennie tipped the '
                         'Australian dollar will slip further on Thursday.</span></p><p><span style=\"background-color:'
                         'transparent;\">\"We should make fresh lows, we should be pushing down though 74 US cents '
                         'and possibly lower,\" he said.</span></p><p><span style=\"background-color: '
                         'transparent;\">KEY MOVEMENTS:</span></p><p><span style=\"background-color: transparent;\">One'
                         'Australian dollar buys:</span><br></p><p>   * 74.98 US cents, from\n\n 74.41\n\ncents on '
                         'Wednesday</p><p>   * 75.63 Japanese yen, from \n\n75.15\n\n yen</p><p>   * 67.64 euro cents, '
                         'from \n\n67.24\n\n euro cents</p><p>   * 105.01 New Zealand cents, from \n\n104.85\n\n NZ '
                         'cents</p><p>   * 57.96 British pence, from \n\n57.53\n\n pence</p><p>   Government bond '
                         'yields:</p><p>   * CGS 5.25pct March 2019, 1.510pct, from \n\n1.513pct</p><p>   * CGS 4.25pct'
                         'April 2026, 1.868pct, from \n\n1.862pct</p><p>   Sydney Futures Exchange prices:</p><p>   *'
                         'September 2016 10-year bond futures contract, was at 98.125\n\n (1.875\n\n per cent), '
                         'unchanged from Wednesday</p><p>   * September 2016 3-year bond futures contract, at 98.570 '
                         '(1.430 per cent), up from \n\n98.550\n\n (1.450\n\n per cent)</p><p>   (*Currency closes '
                         'taken at 1700 AEST previous local session, bond market closes taken at 1630 AEST previous '
                         'local session)</p><p>   Source: IRESS</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertTrue('from 74.41' in nitf_xml.findall('body/body.content/p')[1].text)

    def testStraySpaceContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">\"</span>'
                         '<span style=\"background-color: transparent;\">However</span></p>'
                         '<p>\"<span style=\"background-color: transparent;\">The proposed</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, '"However')

    def testNoneAsciNamesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Makinen crashes a Skoda in Appelbo')

    def testSpacesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>a b  c   d&nbsp;e&nbsp;&nbsp;f\xA0g</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'a b c d e  f g')

    def testControlCharsContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">\u0018\u0012\f \u000b\u0012\b</span>'
                         '<span style=\"background-color: transparent;\">\u0005\f\u0006\b \u0006\f\u0019&nbsp;</span>'
                         '</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, ' ')

    def testNullTakeKeyContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': None,
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': None,
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>no body</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        resp = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(resp['formatted_item'])
        self.assertIsNone(nitf_xml.find('head/meta[@name="anpa-takekey"]'))
 def setUp(self):
     self.formatter = IRESSNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.setUpData()
class NitfFormatterTest(TestCase):
    def setUp(self):
        super().setUp()
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': True
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': False
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(
            nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_company_codes(self):
        article = {
            'guid':
            'tag:aap.com.au:20150613:12345',
            '_current_version':
            1,
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'source':
            'AAP',
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001',
                'name': 'international court or tribunal'
            }, {
                'qcode': '02011002',
                'name': 'extradition'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'body_html':
            'The story body',
            'type':
            'text',
            'word_count':
            '1',
            'priority':
            '1',
            '_id':
            'urn:localhost.abc',
            'state':
            'published',
            'urgency':
            2,
            'pubstatus':
            'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline':
            'sample creditline',
            'keywords': ['traffic'],
            'abstract':
            'sample abstract',
            'place': [{
                'qcode': 'Australia',
                'name': 'Australia',
                'state': '',
                'country': 'Australia',
                'world_region': 'Oceania'
            }],
            'company_codes': [{
                'name': 'YANCOAL AUSTRALIA LIMITED',
                'qcode': 'YAL',
                'security_exchange': 'ASX'
            }]
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'type':
            'text',
            'body_html':
            '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count':
            '1',
            'priority':
            1,
            "linked_in_packages": [{
                "package": "package",
                "package_type": "takes"
            }],
        }
        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text,
            'Tommi Mäkinen crashes a Škoda in Äppelbo')
 def setUp(self):
     self.formatter = IRESSNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.setUpData()
class IRESSNITFFormatterTest(TestCase):
    line_ender = b'\x19\x0D\x0A'.decode()
    line_feed = 'ZZZZYYYY\n'
    line_prefix = '   '

    def setUp(self):
        self.formatter = IRESSNITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)
        self.setUpData()

    def setUpData(self):
        copyrights = [{
            "_id":
            "rightsinfo",
            "items": [{
                "usageTerms": "Usage terms goes here.",
                "copyrightHolder": "Australian Associated Press",
                "is_active": True,
                "copyrightNotice": "Notice goes here.",
                "name": "AAP"
            }, {
                "usageTerms": "Usage terms goes here.",
                "copyrightHolder": "Australian Associated Press",
                "is_active": True,
                "copyrightNotice": "Notice goes here.",
                "name": "default"
            }, {
                "usageTerms": "Usage terms goes here.",
                "copyrightHolder": "Foo bar",
                "is_active": True,
                "copyrightNotice": "Notice goes here.",
                "name": "SUP"
            }],
            "type":
            "manageable",
            "_etag":
            "init",
            "display_name":
            "Copyrights",
        }]
        self.app.data.insert('vocabularies', copyrights)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': True
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': False
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p><p>test body</p>',
            'slugline': 'keyword',
            'anpa_take_key': 'take-key',
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'original_source': 'EMAIL',
            'type': 'text',
            'priority': '9',
            'source': 'SUP',
            '_id': 'urn:localhost.abc',
            'guid': 'urn:localhost.abc',
            'urgency': 2,
            'word_count': 99,
            'unique_id': 11,
            'place': [{
                'qcode': 'FED'
            }],
            'sign_off': 'me',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0),
            'byline': 'Bar Foo'
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'FED:test headline')
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text,
            '   Bar FooZZZZYYYY\n   test bodyZZZZYYYY\n   test bodyZZZZYYYY\n   SUP meZZZZYYYY\n'
        )
        self.assertEqual(
            nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-sequence"]').get('content'),
            str(doc['published_seq_num']).zfill(4))
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-keyword"]').get('content'),
            'keyword')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-takekey"]').get('content'),
            'take-key')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-category"]').get('content'),
            'f')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-wordcount"]').get('content'),
            '0099')
        self.assertEqual(
            nitf_xml.find('body/body.head/hedline/hl1').text,
            'FED:test headline')
        self.assertEqual(
            nitf_xml.find('head/docdata/doc.copyright').get('year'), '2018')
        self.assertEqual(
            nitf_xml.find('head/docdata/doc.copyright').get('holder'),
            'Foo bar')
        self.assertEqual(
            nitf_xml.find('head/docdata/doc-id').get('id-string'),
            'AAP.20180613.11')
        self.assertEqual(
            nitf_xml.find('head/docdata/date.issue').get('norm'),
            article.get('versioncreated').strftime('%Y%m%dT%H%M%S'))
        self.assertEqual(
            nitf_xml.find('head/docdata/date.release').get('norm'),
            article.get('versioncreated').strftime('%Y%m%dT%H%M%S'))
        self.assertEqual(len(nitf_xml.findall('body/body.end')), 0)

    def test_company_codes(self):
        article = {
            'guid':
            'tag:aap.com.au:20150613:12345',
            '_current_version':
            1,
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'source':
            'AAP',
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001',
                'name': 'international court or tribunal'
            }, {
                'qcode': '02011002',
                'name': 'extradition'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'body_html':
            'The story body',
            'type':
            'text',
            'word_count':
            '1',
            'priority':
            '1',
            '_id':
            'urn:localhost.abc',
            'state':
            'published',
            'urgency':
            2,
            'pubstatus':
            'usable',
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline':
            'sample creditline',
            'keywords': ['traffic'],
            'abstract':
            'sample abstract',
            'place': [{
                'qcode': 'Australia',
                'name': 'Australia',
                'state': '',
                'country': 'Australia',
                'world_region': 'Oceania'
            }],
            'company_codes': [{
                'name': 'YANCOAL AUSTRALIA LIMITED',
                'qcode': 'YAL',
                'security_exchange': 'ASX'
            }, {
                'name': 'BHP Billiton Limited',
                'qcode': 'BHP',
                'security_exchange': 'ASX'
            }]
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('head/meta[@name="asx-codes"]').get('content'),
            "YAL BHP")

    def testDivContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'type':
            'text',
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'body_html':
            '<div>Kathmandu Holdings has lodged a claim in the New Zealand High'
            'Court for the recovery of costs associated with last years takeover bid from Briscoe'
            'Group.</div><div>Kathmandu Holdings has lodged a claim in the New Zealand High Court for '
            'the recovery of costs associated with last years takeover bid from Briscoe Group.'
            '</div><div><br></div><div><br></div><div>Kathmandu incurred costs in relation to the takeover'
            'After an initial request for payment on November 20, 2015 and subsequent correspondence, '
            'Briscoe made a payment of $637,711.65 on May 25, 2016 without prejudice to its position on '
            'what sum Kathmandu is entitled to recover.</div><div><br></div><div>Kathmandu considers the '
            'full amount claimed is recoverable and has issued legal proceedings for the balance of monies'
            ' owed.</div>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue(
            nitf_xml.find('body/body.content/pre').text.startswith(
                '   joeZZZZYYYY\n   Kathmandu Holdings'))
        self.assertTrue(
            nitf_xml.find('body/body.content/pre').text.split('\n'), 7)

    def testLFContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'type':
            'text',
            'body_html':
            '<p><span style=\"background-color: transparent;\">The Australian dollar has tumbled'
            ' after&nbsp;</span>Standard &amp; Poor\'s warned the country\'s triple-A credit rating '
            'is at risk.<br></p><p>   At 1200 AEST on Thursday, the currency was trading at 74.98 US '
            'cents, up from\n\n 74.41\n\n cents on Wednesday, but down from a high of 75.38 on Thursday '
            'morning.</p><p>S&amp;P downgraded its outlook on Australia\'s credit rating from stable '
            'to negative, due to the prospect of ongoing budget deficits without substantial reforms '
            'being passed by parliament.</p><p>Westpac chief currency strategist&nbsp;Robert Rennie '
            'said the uncertain election outcome is likely to result in a longer run of budget'
            'deficits.</p><p>\"It was clearly a risk and the market has been living in its shadow since'
            'Monday morning,\" he said.</p><p>\"Gridlock or the inability to improve the fiscal situation '
            'over the forecast period is something I think a ratings agency ought to take into '
            'account.\"</p><p><span style=\"background-color: transparent;\">The currency had a sudden '
            'plunge to 74.67 US cents on the announcement from S&amp;P, before recovering some of that '
            'ground.</span></p><p><span style=\"background-color: transparent;\">Mr Rennie tipped the '
            'Australian dollar will slip further on Thursday.</span></p><p><span style=\"background-color:'
            'transparent;\">\"We should make fresh lows, we should be pushing down though 74 US cents '
            'and possibly lower,\" he said.</span></p><p><span style=\"background-color: '
            'transparent;\">KEY MOVEMENTS:</span></p><p><span style=\"background-color: transparent;\">One'
            'Australian dollar buys:</span><br></p><p>   * 74.98 US cents, from\n\n 74.41\n\ncents on '
            'Wednesday</p><p>   * 75.63 Japanese yen, from \n\n75.15\n\n yen</p><p>   * 67.64 euro cents, '
            'from \n\n67.24\n\n euro cents</p><p>   * 105.01 New Zealand cents, from \n\n104.85\n\n NZ '
            'cents</p><p>   * 57.96 British pence, from \n\n57.53\n\n pence</p><p>   Government bond '
            'yields:</p><p>   * CGS 5.25pct March 2019, 1.510pct, from \n\n1.513pct</p><p>   * CGS 4.25pct'
            'April 2026, 1.868pct, from \n\n1.862pct</p><p>   Sydney Futures Exchange prices:</p><p>   *'
            'September 2016 10-year bond futures contract, was at 98.125\n\n (1.875\n\n per cent), '
            'unchanged from Wednesday</p><p>   * September 2016 3-year bond futures contract, at 98.570 '
            '(1.430 per cent), up from \n\n98.550\n\n (1.450\n\n per cent)</p><p>   (*Currency closes '
            'taken at 1700 AEST previous local session, bond market closes taken at 1630 AEST previous '
            'local session)</p><p>   Source: IRESS</p>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue(
            'from 74.41' in nitf_xml.find('body/body.content/pre').text)

    def testStraySpaceContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'type':
            'text',
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'body_html':
            '<p><span style=\"background-color: transparent;\">\"</span>'
            '<span style=\"background-color: transparent;\">However</span></p>'
            '<p>\"<span style=\"background-color: transparent;\">The proposed</p>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text,
            '\"However   \"The proposedZZZZYYYY\n   AAPZZZZYYYY\n')

    def testSpacesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0),
            'body_html': '<p>a b  c   d&nbsp;e&nbsp;&nbsp;f\xA0g</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text,
            '   a b c d e  f gZZZZYYYY\n   AAPZZZZYYYY\n')

    def testControlCharsContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'type':
            'text',
            'body_html':
            '<p><span style=\"background-color: transparent;\">\u0018\u0012\f \u000b\u0012\b</span>'
            '<span style=\"background-color: transparent;\">\u0005\f\u0006\b \u0006\f\u0019&nbsp;</span>'
            '</p>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text, '      AAPZZZZYYYY\n')

    def testNullTakeKeyContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline': 'This is a test headline',
            'byline': None,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0),
            'slugline': 'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key': None,
            'unique_id': 1,
            'type': 'text',
            'body_html': '<p>no body</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertIsNone(nitf_xml.find('head/meta[@name="anpa-takekey"]'))

    def testLocator(self):
        article = {
            '_id':
            '4853',
            'slugline':
            'Gangs',
            'byline':
            '',
            'anpa_category': [{
                'name': 'Australian General News',
                'qcode': 'a'
            }],
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'genre': [{
                'name': 'Article (news)',
                'qcode': 'Article'
            }],
            'priority':
            6,
            'unique_id':
            33423059,
            'format':
            'HTML',
            'guid':
            '4853',
            'headline':
            'Drop gang campaign, bishop tells Vic Libs',
            'source':
            'TEST',
            'subject': [{
                'name': 'crime, law and justice',
                'qcode': '02000000'
            }],
            'flags': {
                'marked_for_sms': False,
                'marked_for_not_publication': False,
                'marked_archived_only': False,
                'marked_for_legal': False
            },
            'anpa_take_key':
            None,
            'pubstatus':
            'usable',
            'schedule_settings': {
                'time_zone': None,
                'utc_embargo': None,
                'utc_publish_schedule': None
            },
            'urgency':
            5,
            'word_count':
            83,
            'type':
            'text',
            'place': [{
                'name': 'VIC',
                'qcode': 'VIC',
                'world_region': 'Oceania',
                'state': 'Victoria',
                'country': 'Australia',
                'group': 'Australia'
            }],
            'state':
            'corrected',
            'body_html':
            '<p>A Melbourne-based Anglican bishop is calling on the Liberal party</P',
            '_current_version':
            3,
            'sign_off':
            'MG',
            'ednote':
            'In the story \'Gangs\' sent at: 16/07/2018 15:26\r\n\r\nThis is a corrected repeat.',
            'sms_message':
            ''
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('head/title').text,
            'VIC:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(
            nitf_xml.find('body/body.head/hedline/hl1').text,
            'VIC:Drop gang campaign, bishop tells Vic Libs')

        article['anpa_category'] = [{'name': 'Domestic Sports', 'qcode': 't'}]
        article['subject'] = [{'name': 'Cricket', 'qcode': '15017000'}]
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('head/title').text,
            'CRIK:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(
            nitf_xml.find('body/body.head/hedline/hl1').text,
            'CRIK:Drop gang campaign, bishop tells Vic Libs')
 def setUp(self):
     super().setUp()
     self.formatter = NITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
class NitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_html2nitf(self):
        html = etree.fromstring(dedent("""\
            <div>
                <unknown>
                    <p>
                        this should be still here
                    </p>
                </unknown>
                <p style="this='is';some='style'">
                    <strong>this text should be
                        <i>modified</i>
                    </strong>
                    so
                    <span>[this should not be removed]</span>
                    unkown
                    <em unknown_attribute="toto">elements</em>
                    and
                    <a bad_attribute="to_remove">attributes</a>
                    are
                    <h6>removed</h6>
                </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        this should be still here
                    </p>
                <p>
                    <em class="bold">this text should be
                        <em class="italic">modified</em>
                    </em>
                    so [this should not be removed] unkown
                    <em class="italic">elements</em>
                    and
                    <a>attributes</a>
                    are
                    <hl2>removed</hl2>
                </p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected)

    def test_html2nitf_br(self):
        """Check that <br/> is kept if it is a child of and enrichedText parent element"""
        html = etree.fromstring(dedent("""\
            <div>
                <br/>the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_body_content_br(self):
        article = {
            "_id": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "guid": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "state": "published",
            "body_html": "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it "
                         "may have been a 10-minute delay by police that cost two lives.</p><p>Shortly after 2am "
                         "on December 16, 2014, gunman Man Haron Monis fired at escaping hostages.</p><p>That first "
                         "shot by Monis&nbsp;should have satisfied a so-called \"secondary trigger\" prompting "
                         "police to storm the Martin Place stronghold with some element of surprise, NSW Coroner "
                         "Michael Barnes found on Wednesday.</p><p>\"(But) the 10 minutes that lapsed without "
                         "decisive action by police was too long,\" Mr Barnes&nbsp;said in Sydney.</p><p>By the "
                         "time police smashed their way into the cafe at 2.14am in a flurry of stun-grenade "
                         "explosions, manager Tori Johnson had been forced to his knees and shot in the head."
                         "</p><p>Barrister Katrina Dawson was killed by police bullet fragments.</p><p>New police "
                         "commissioner Mick Fuller - who was one of the first commanders at the scene - admitted on "
                         "Wednesday tactical officers should have gone in earlier.</p><p>Mr Fuller went further than "
                         "the coroner when he told the Ten Network: \"We probably should have gone in before the "
                         "first shot.\"</p><p>\"Early intervention for terrorist incidents is the way forward, "
                         "knowing what we know now,\" he said.</p><p>\"But nevertheless it's still a very "
                         "dangerous tactic and people will potentially still lose their lives.\"</p><p>Mr Barnes "
                         "has made 45 findings on everything from police negotiation strategies to greater legal "
                         "protections for frontline officers in terrorist situations.<br></p><p>He lay the blame "
                         "for the loss of life squarely at the feet of Monis, but other parties, including prosecution "
                         "solicitors and a consulting psychiatrist, copped some criticism.</p><p>The cafe "
                         "was \"transformed into a prison run by a vicious maniac\" that day, Mr Barnes "
                         "said.<br></p><p class=\"\">The traditional \"contain and negotiate\" strategy was "
                         "appropriate early on but police failed to adequately reassess taking into account the "
                         "headway they were making with Monis.<br></p><p class=\"\">\"Sadly, it failed,\" Mr Barnes "
                         "said on Wednesday.<br></p><p class=\"\">\"The 'contain and negotiate' approach may not be "
                         "the best ongoing response to a terrorist incident if the offenders believe, whether or not "
                         "they survive, their cause will benefit from the publicity generated by a protracted "
                         "siege.\"<br></p><p class=\"\">Mr Fuller said the containment strategy had saved countless "
                         "lives over the years and wouldn't be abandoned for non-terrorist incidents.</p>"
                         "<p class=\"\">Police mistakenly thought Monis' backpack could house explosives, "
                         "but Mr Barnes noted senior officers were \"unduly reluctant\" to initiate direct action "
                         "plans during siege situations.</p><p class=\"\">The \"primary trigger\" for such an "
                         "assault was identified as the death or serious injury of a hostage - evidence which "
                         "disgusted the families of victims and survivors.</p><p class=\"\">The police response was "
                         "at times hampered by mishaps.</p><p>Eight calls to a phone number hostages expected would "
                         "connect them with negotiators were missed, which was a \"significant failure\", Mr Barnes "
                         "said.</p><p>Commanders and tactical officers received specialist terrorist training but "
                         "negotiators received \"little, if any\", the coroner added.</p><p>The stand-off could not "
                         "have eventuated in the first place if Monis had not been granted bail on accessory to murder"
                         " and dozens of sexual assault offences.<br></p><p>Mr Barnes found the work of an Office of "
                         "the Director of Public Prosecutions solicitor in December 2013 was inadequate, "
                         "\"erroneously\" advising a court Monis didn't have to show exceptional circumstances "
                         "in arguing for bail.</p><p>Police also made a mistake by issuing Monis with a court "
                         "attendance notice for the sexual offences in October 2014 rather than arresting him"
                         ".</p><p>Monis was already on bail at the time for a commonwealth offence after he'd "
                         "written offensive letters to the families of Australian soldiers killed in the Middle"
                         " East.</p><p>States can find it difficult to access commonwealth records, Mr Barnes said,"
                         " and he called for that to be remedied.</p><p>Some of the coroner's harshest individual "
                         "criticism was reserved for the consultant psychiatrist who advised police.</p>"
                         "<p>His \"sub-optimal\" performance included a belief that Monis was merely grandstanding,"
                         " Mr Barnes found.</p><p>The doctor should not have been permitted to advise on negotiation "
                         "strategy and he made \"erroneous and unrealistic assessments\" of what was happening inside "
                         "the cafe.</p><p>The psychiatrist's advice was ambiguous and Islamic terrorism was beyond his"
                         " expertise.</p><p>\"The police commanders underestimated the threat Monis posed,\" Mr Barnes"
                         " said, in part blaming their reliance upon the psychiatrist's opinion.</p><p>He recommended "
                         "a more diverse panel of experts be used in the future.</p><p>The coroner stated police "
                         "snipers couldn't have ended the siege despite a 10-minute window where they had clear sight "
                         "of a head that could have been Monis.</p><p>Those gunmen were never confident in their legal "
                         "justification for a \"kill shot\" and Mr Barnes suggested their&nbsp;power to use force "
                         "should be more clearly defined.</p><p>The coroner did acknowledge that sending tactical "
                         "officers into the cafe after their hand was forced was a decision no commander would "
                         "ever want to face.</p><p>\"The bravery of these officers inspires awe,\" he said.<br></p>",
            "pubstatus": "usable",
            "type": "text",
            "abstract": "<p>The NSW coroner believes a 10-minute period of inaction by police before the bloody end "
                        "of the 2014 Lindt Cafe siege was \"too long\".</p>",
            "priority": 6,
            "unique_id": 12055427,
            "format": "HTML",
            "genre": [
                {
                    "qcode": "Wrap",
                    "name": "Wrap"
                }
            ],
            "word_count": 843,
            "source": "AAP",
            "urgency": 1,
            "subject": [
                {
                    "qcode": "16001000",
                    "parent": "16000000",
                    "name": "act of terror"
                },
                {
                    "qcode": "02001010",
                    "parent": "02001000",
                    "name": "terrorism"
                }
            ],
            "flags": {
                "marked_archived_only": False,
                "marked_for_legal": False,
                "marked_for_not_publication": False,
                "marked_for_sms": False
            },
            "headline": "'Ten minutes was too long': Lindt siege",
            "dateline": {
                "source": "AAP",
                "text": "SYDNEY, May 24 AAP -",
                "located": {
                    "city": "Sydney",
                    "country_code": "AU",
                    "country": "Australia",
                    "dateline": "city",
                    "state_code": "NSW",
                    "state": "New South Wales",
                    "alt_name": "",
                    "tz": "Australia/Sydney",
                    "city_code": "Sydney"
                }
            },
            "anpa_category": [
                {
                    "qcode": "a",
                    "name": "Australian General News"
                }
            ],
            "unique_name": "#12055427",
            "place": [
                {
                    "name": "NSW",
                    "qcode": "NSW",
                    "country": "Australia",
                    "world_region": "Oceania",
                    "group": "Australia",
                    "state": "New South Wales"
                }
            ],
            "sign_off": "SN/jmk/jcd/pmu",
            "anpa_take_key": "2nd Wrap (pix/video available)",
            "language": "en",
            "slugline": "Cafe",
            "byline": "Jamie McKinnell",
            "version": 2,
        }

        response = self.formatter.format(article, {})
        nitf_xml = etree.fromstring(response[0][1])
        self.assertEqual(etree.tostring(nitf_xml.find('body/body.content/p'), encoding="unicode"),
                         "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it may "
                         "have been a 10-minute delay by police that cost two lives.</p>\n      ")
        self.assertTrue(nitf_xml.findall('body/body.content/p')[1].text.startswith('Shortly after 2am'))

    def test_html2nitf_br_last(self):
        """Check that last <br/> in a <p> element is removed"""
        html = etree.fromstring(dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:<br/>
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_html2nitf_style_cleaning(self):
        """Check that <style> element and style attribute are removed from HTML"""
        html = etree.fromstring(dedent("""\
            <div>
                <style type="text/css">
                    p { margin-bottom: 0.25cm; line-height: 120%; }a:link {  }
                </style>
                <p style="margin-bottom: 0cm; line-height: 100%">Test bla bla bla</p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <br/>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <font face="DejaVu Sans, sans-serif">
                        <font style="font-size: 14pt" size="4">
                            <i>
                                <u>
                                    <b>test</b>
                                </u>
                            </i>
                        </font>
                    </font>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">toto</p>
                <p style="margin-bottom: 0cm; line-height: 100%">titi</p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                <p>Test bla bla bla</p>
                <p>
                </p>
                <p>
                    <em class="italic">
                        <em class="underscore">
                            <em class="bold">test</em>
                        </em>
                    </em>
                </p>
                <p>toto</p>
                <p>titi</p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected)

    def test_table(self):
        html_raw = """
        <div>
        <table>
            <tbody>
                <tr>
                    <td>Table cell 1</td>
                    <td>Table cell 2</td>
                    <td>Table cell 3</td>
                </tr>
                <tr>
                    <td>Table cell 2.1</td>
                    <td>Table cell 2.2</td>
                    <td>Table cell 2.3</td>
                </tr>
                <tr>
                    <td>Table cell 3.1</td>
                    <td>Table cell 3.2</td>
                    <td>Table cell 3.3</td>
                </tr>
            </tbody>
        </table>
        </div>
        """.replace('\n', '').replace(' ', '')
        html = etree.fromstring(html_raw)
        nitf = self.formatter.html2nitf(html)
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), html_raw)

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')