Python Formatter Examples

Programming Language: Python

Namespace/Package Name: superdesk.publish.formatters

Class/Type: Formatter

Examples at hotexamples.com: 20

Python Formatter - 20 examples found. These are the top rated real world Python examples of superdesk.publish.formatters.Formatter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Formatter(6)

append_legal(5)

__init__(2)

Example #1

Show file

File: nitf_formatter_tests.py Project: ahilles107/superdesk-core

class NitfFormatterTest(TestCase):
    def setUp(self):
        super().setUp()
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

Example #2

Show file

File: nitf_formatter.py Project: jerome-poisson/superdesk-core

    def __init__(self):
        Formatter.__init__(self)

        # in elements' dicts, following key can be used:
        # - 'nitf': new NITF compatible element to use (empty string to remove)
        # - 'attrib': new attribute to use (replace existing one if set)
        # - 'filter': callback to use for complex changes (root_elment and element as arguments)
        self.HTML2NITF = {
            'p': {},
            'b': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'br': {'filter': self.br_filter},
            'strong': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'i': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'em': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'u': {
                'nitf': 'em',
                'attrib': {'class': 'underscore'}},
            'strike': {'nitf': 'em'},
            'sup': {},
            'sub': {},
            'a': {},
            'img': {'nitf': ''},  # <img> use <media> in nitf, so we remove element
            'blockquote': {'nitf': 'bq'},
            'pre': {},
            'ol': {},
            'ul': {},
            'li': {},
            # FIXME: hl1 is not used here as it can only appear in <hedline>
            'h1': {'nitf': 'hl2'},
            'h2': {'nitf': 'hl2'},
            'h3': {'nitf': 'hl2'},
            'h4': {'nitf': 'hl2'},
            'h5': {'nitf': 'hl2'},
            'h6': {'nitf': 'hl2'},
            # tables
            'table': {},
            'tbody': {},
            'tr': {},
            'td': {},
            'th': {},
            'style': {'nitf': EraseElement},  # <style> may be there in case of bad paste
        }

Example #3

Show file

    def __init__(self):
        Formatter.__init__(self)

        # in elements' dicts, following key can be used:
        # - 'nitf': new NITF compatible element to use (empty string to remove)
        # - 'attrib': new attribute to use (replace existing one if set)
        # - 'filter': callback to use for complex changes (root_elment and element as arguments)
        self.HTML2NITF = {
            'p': {},
            'b': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'br': {'filter': self.br_filter},
            'strong': {
                'nitf': 'em',
                'attrib': {'class': 'bold'}},
            'i': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'em': {
                'nitf': 'em',
                'attrib': {'class': 'italic'}},
            'u': {
                'nitf': 'em',
                'attrib': {'class': 'underscore'}},
            'strike': {'nitf': 'em'},
            'sup': {},
            'sub': {},
            'a': {},
            'img': {'nitf': ''},  # <img> use <media> in nitf, so we remove element
            'blockquote': {'nitf': 'bq'},
            'pre': {},
            'ol': {},
            'ul': {},
            'li': {},
            # FIXME: hl1 is not used here as it can only appear in <hedline>
            'h1': {'nitf': 'hl2'},
            'h2': {'nitf': 'hl2'},
            'h3': {'nitf': 'hl2'},
            'h4': {'nitf': 'hl2'},
            'h5': {'nitf': 'hl2'},
            'h6': {'nitf': 'hl2'},
            # tables
            'table': {},
            'tbody': {},
            'tr': {},
            'td': {},
            'th': {},
            'style': {'nitf': EraseElement},  # <style> may be there in case of bad paste
        }

Example #4

Show file

File: ntb_nitf_legacy_test.py Project: tomaskikutis/superdesk-ntb

 def setUp(self):
     super(TestCase, self).setUp()
     article_legacy = ARTICLE.copy()
     article_legacy['anpa_category'] = [{
         'name': 'service1'
     }, {
         'name': 'service2'
     }, {
         'name': 'service3'
     }]
     self.formatter = NTBNITFMultiFileFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.tz = pytz.timezone(self.app.config['DEFAULT_TIMEZONE'])
     if self.article is None:
         # formatting is done once for all tests to save time
         # as long as used attributes are not modified, it's fine
         self.article = article_legacy
         self.formatter_output = self.formatter.format(
             self.article, {'name': 'Test NTBNITF'})
         self.docs = [
             formatter['encoded_item']
             for formatter in self.formatter_output
         ]
         self.nitf_xmls = [etree.fromstring(doc) for doc in self.docs]
         self.nitf_xml = self.nitf_xmls[0]

Example #5

Show file

File: agenda_planning_formatter_test.py Project: araxiskeshju/superdesk-aap

 def setUp(self):
     self.formatter = AgendaPlanningFormatter()
     self.base_formatter = Formatter()
     planning_init_app(self.app)
     init_agenda(self.app)
     self.app.data.insert('locations', self.locations)
     self.app.data.insert('agenda_city_map', self.city_map)
     self.app.data.insert('agenda_iptc_map', self.iptc_map)
     self.app.data.insert('contacts', self.contacts)

Example #6

Show file

File: nitf_formatter.py Project: superdesk/superdesk-core

    def __init__(self):
        Formatter.__init__(self)

        # in elements' dicts, following key can be used:
        # - 'nitf': new NITF compatible element to use (empty string to remove)
        # - 'attrib': new attribute to use (replace existing one if set)
        # - 'filter': callback to use for complex changes (root_elment and element as arguments)
        self.HTML2NITF = {
            "p": {},
            "b": {"nitf": "em", "attrib": {"class": "bold"}},
            "br": {"filter": self.br_filter},
            "strong": {"nitf": "em", "attrib": {"class": "bold"}},
            "i": {"nitf": "em", "attrib": {"class": "italic"}},
            "em": {"nitf": "em", "attrib": {"class": "italic"}},
            "u": {"nitf": "em", "attrib": {"class": "underscore"}},
            "strike": {"nitf": "em"},
            "sup": {},
            "sub": {},
            "a": {},
            "img": {"nitf": ""},  # <img> use <media> in nitf, so we remove element
            "blockquote": {"nitf": "bq"},
            "pre": {},
            "ol": {},
            "ul": {},
            "li": {},
            # FIXME: hl1 is not used here as it can only appear in <hedline>
            "h1": {"nitf": "hl2"},
            "h2": {"nitf": "hl2"},
            "h3": {"nitf": "hl2"},
            "h4": {"nitf": "hl2"},
            "h5": {"nitf": "hl2"},
            "h6": {"nitf": "hl2"},
            # tables
            "table": {},
            "tbody": {},
            "tr": {},
            "td": {},
            "th": {},
            "style": {"nitf": EraseElement},  # <style> may be there in case of bad paste
        }

Example #7

Show file

File: ntb_nitf_test.py Project: ride90/superdesk-ntb

 def setUp(self):
     super().setUp()
     self.formatter = NTBNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.tz = pytz.timezone(self.app.config['DEFAULT_TIMEZONE'])
     if self.article is None:
         # formatting is done once for all tests to save time
         # as long as used attributes are not modified, it's fine
         self.article = ARTICLE
         self.formatter_output = self.formatter.format(self.article, {'name': 'Test NTBNITF'})
         self.doc = self.formatter_output[0]['encoded_item']
         self.nitf_xml = etree.fromstring(self.doc)

Example #8

Show file

class NitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': True
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': False
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(
            nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_html2nitf(self):
        html = etree.fromstring(
            dedent("""\
            <div>
                <unknown>
                    <p>
                        this should be still here
                    </p>
                </unknown>
                <p style="this='is';some='style'">
                    <strong>this text should be
                        <i>modified</i>
                    </strong>
                    so
                    <span>[this should not be removed]</span>
                    unkown
                    <em unknown_attribute="toto">elements</em>
                    and
                    <a bad_attribute="to_remove">attributes</a>
                    are
                    <h6>removed</h6>
                </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        this should be still here
                    </p>
                <p>
                    <em class="bold">this text should be
                        <em class="italic">modified</em>
                    </em>
                    so [this should not be removed] unkown
                    <em class="italic">elements</em>
                    and
                    <a>attributes</a>
                    are
                    <hl2>removed</hl2>
                </p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(
            etree.tostring(nitf,
                           encoding='unicode').replace('\n',
                                                       '').replace(' ', ''),
            expected)

    def test_html2nitf_br(self):
        """Check that <br/> is kept if it is a child of and enrichedText parent element"""
        html = etree.fromstring(
            dedent("""\
            <div>
                <br/>the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_body_content_br(self):
        article = {
            "_id":
            "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "guid":
            "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "state":
            "published",
            "body_html":
            "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it "
            "may have been a 10-minute delay by police that cost two lives.</p><p>Shortly after 2am "
            "on December 16, 2014, gunman Man Haron Monis fired at escaping hostages.</p><p>That first "
            "shot by Monis&nbsp;should have satisfied a so-called \"secondary trigger\" prompting "
            "police to storm the Martin Place stronghold with some element of surprise, NSW Coroner "
            "Michael Barnes found on Wednesday.</p><p>\"(But) the 10 minutes that lapsed without "
            "decisive action by police was too long,\" Mr Barnes&nbsp;said in Sydney.</p><p>By the "
            "time police smashed their way into the cafe at 2.14am in a flurry of stun-grenade "
            "explosions, manager Tori Johnson had been forced to his knees and shot in the head."
            "</p><p>Barrister Katrina Dawson was killed by police bullet fragments.</p><p>New police "
            "commissioner Mick Fuller - who was one of the first commanders at the scene - admitted on "
            "Wednesday tactical officers should have gone in earlier.</p><p>Mr Fuller went further than "
            "the coroner when he told the Ten Network: \"We probably should have gone in before the "
            "first shot.\"</p><p>\"Early intervention for terrorist incidents is the way forward, "
            "knowing what we know now,\" he said.</p><p>\"But nevertheless it's still a very "
            "dangerous tactic and people will potentially still lose their lives.\"</p><p>Mr Barnes "
            "has made 45 findings on everything from police negotiation strategies to greater legal "
            "protections for frontline officers in terrorist situations.<br></p><p>He lay the blame "
            "for the loss of life squarely at the feet of Monis, but other parties, including prosecution "
            "solicitors and a consulting psychiatrist, copped some criticism.</p><p>The cafe "
            "was \"transformed into a prison run by a vicious maniac\" that day, Mr Barnes "
            "said.<br></p><p class=\"\">The traditional \"contain and negotiate\" strategy was "
            "appropriate early on but police failed to adequately reassess taking into account the "
            "headway they were making with Monis.<br></p><p class=\"\">\"Sadly, it failed,\" Mr Barnes "
            "said on Wednesday.<br></p><p class=\"\">\"The 'contain and negotiate' approach may not be "
            "the best ongoing response to a terrorist incident if the offenders believe, whether or not "
            "they survive, their cause will benefit from the publicity generated by a protracted "
            "siege.\"<br></p><p class=\"\">Mr Fuller said the containment strategy had saved countless "
            "lives over the years and wouldn't be abandoned for non-terrorist incidents.</p>"
            "<p class=\"\">Police mistakenly thought Monis' backpack could house explosives, "
            "but Mr Barnes noted senior officers were \"unduly reluctant\" to initiate direct action "
            "plans during siege situations.</p><p class=\"\">The \"primary trigger\" for such an "
            "assault was identified as the death or serious injury of a hostage - evidence which "
            "disgusted the families of victims and survivors.</p><p class=\"\">The police response was "
            "at times hampered by mishaps.</p><p>Eight calls to a phone number hostages expected would "
            "connect them with negotiators were missed, which was a \"significant failure\", Mr Barnes "
            "said.</p><p>Commanders and tactical officers received specialist terrorist training but "
            "negotiators received \"little, if any\", the coroner added.</p><p>The stand-off could not "
            "have eventuated in the first place if Monis had not been granted bail on accessory to murder"
            " and dozens of sexual assault offences.<br></p><p>Mr Barnes found the work of an Office of "
            "the Director of Public Prosecutions solicitor in December 2013 was inadequate, "
            "\"erroneously\" advising a court Monis didn't have to show exceptional circumstances "
            "in arguing for bail.</p><p>Police also made a mistake by issuing Monis with a court "
            "attendance notice for the sexual offences in October 2014 rather than arresting him"
            ".</p><p>Monis was already on bail at the time for a commonwealth offence after he'd "
            "written offensive letters to the families of Australian soldiers killed in the Middle"
            " East.</p><p>States can find it difficult to access commonwealth records, Mr Barnes said,"
            " and he called for that to be remedied.</p><p>Some of the coroner's harshest individual "
            "criticism was reserved for the consultant psychiatrist who advised police.</p>"
            "<p>His \"sub-optimal\" performance included a belief that Monis was merely grandstanding,"
            " Mr Barnes found.</p><p>The doctor should not have been permitted to advise on negotiation "
            "strategy and he made \"erroneous and unrealistic assessments\" of what was happening inside "
            "the cafe.</p><p>The psychiatrist's advice was ambiguous and Islamic terrorism was beyond his"
            " expertise.</p><p>\"The police commanders underestimated the threat Monis posed,\" Mr Barnes"
            " said, in part blaming their reliance upon the psychiatrist's opinion.</p><p>He recommended "
            "a more diverse panel of experts be used in the future.</p><p>The coroner stated police "
            "snipers couldn't have ended the siege despite a 10-minute window where they had clear sight "
            "of a head that could have been Monis.</p><p>Those gunmen were never confident in their legal "
            "justification for a \"kill shot\" and Mr Barnes suggested their&nbsp;power to use force "
            "should be more clearly defined.</p><p>The coroner did acknowledge that sending tactical "
            "officers into the cafe after their hand was forced was a decision no commander would "
            "ever want to face.</p><p>\"The bravery of these officers inspires awe,\" he said.<br></p>",
            "pubstatus":
            "usable",
            "type":
            "text",
            "abstract":
            "<p>The NSW coroner believes a 10-minute period of inaction by police before the bloody end "
            "of the 2014 Lindt Cafe siege was \"too long\".</p>",
            "priority":
            6,
            "unique_id":
            12055427,
            "format":
            "HTML",
            "genre": [{
                "qcode": "Wrap",
                "name": "Wrap"
            }],
            "word_count":
            843,
            "source":
            "AAP",
            "urgency":
            1,
            "subject": [{
                "qcode": "16001000",
                "parent": "16000000",
                "name": "act of terror"
            }, {
                "qcode": "02001010",
                "parent": "02001000",
                "name": "terrorism"
            }],
            "flags": {
                "marked_archived_only": False,
                "marked_for_legal": False,
                "marked_for_not_publication": False,
                "marked_for_sms": False
            },
            "headline":
            "'Ten minutes was too long': Lindt siege",
            "dateline": {
                "source": "AAP",
                "text": "SYDNEY, May 24 AAP -",
                "located": {
                    "city": "Sydney",
                    "country_code": "AU",
                    "country": "Australia",
                    "dateline": "city",
                    "state_code": "NSW",
                    "state": "New South Wales",
                    "alt_name": "",
                    "tz": "Australia/Sydney",
                    "city_code": "Sydney"
                }
            },
            "anpa_category": [{
                "qcode": "a",
                "name": "Australian General News"
            }],
            "unique_name":
            "#12055427",
            "place": [{
                "name": "NSW",
                "qcode": "NSW",
                "country": "Australia",
                "world_region": "Oceania",
                "group": "Australia",
                "state": "New South Wales"
            }],
            "sign_off":
            "SN/jmk/jcd/pmu",
            "anpa_take_key":
            "2nd Wrap (pix/video available)",
            "language":
            "en",
            "slugline":
            "Cafe",
            "byline":
            "Jamie McKinnell",
            "version":
            2,
        }

        response = self.formatter.format(article, {})
        nitf_xml = etree.fromstring(response[0][1])
        self.assertEqual(
            etree.tostring(nitf_xml.find('body/body.content/p'),
                           encoding="unicode"),
            "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it may "
            "have been a 10-minute delay by police that cost two lives.</p>\n      "
        )
        self.assertTrue(
            nitf_xml.findall('body/body.content/p')[1].text.startswith(
                'Shortly after 2am'))

    def test_html2nitf_br_last(self):
        """Check that last <br/> in a <p> element is removed"""
        html = etree.fromstring(
            dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:<br/>
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_html2nitf_style_cleaning(self):
        """Check that <style> element and style attribute are removed from HTML"""
        html = etree.fromstring(
            dedent("""\
            <div>
                <style type="text/css">
                    p { margin-bottom: 0.25cm; line-height: 120%; }a:link {  }
                </style>
                <p style="margin-bottom: 0cm; line-height: 100%">Test bla bla bla</p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <br/>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <font face="DejaVu Sans, sans-serif">
                        <font style="font-size: 14pt" size="4">
                            <i>
                                <u>
                                    <b>test</b>
                                </u>
                            </i>
                        </font>
                    </font>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">toto</p>
                <p style="margin-bottom: 0cm; line-height: 100%">titi</p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                <p>Test bla bla bla</p>
                <p>
                </p>
                <p>
                    <em class="italic">
                        <em class="underscore">
                            <em class="bold">test</em>
                        </em>
                    </em>
                </p>
                <p>toto</p>
                <p>titi</p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(
            etree.tostring(nitf,
                           encoding='unicode').replace('\n',
                                                       '').replace(' ', ''),
            expected)

    def test_table(self):
        html_raw = """
        <div>
        <table>
            <tbody>
                <tr>
                    <td>Table cell 1</td>
                    <td>Table cell 2</td>
                    <td>Table cell 3</td>
                </tr>
                <tr>
                    <td>Table cell 2.1</td>
                    <td>Table cell 2.2</td>
                    <td>Table cell 2.3</td>
                </tr>
                <tr>
                    <td>Table cell 3.1</td>
                    <td>Table cell 3.2</td>
                    <td>Table cell 3.3</td>
                </tr>
            </tbody>
        </table>
        </div>
        """.replace('\n', '').replace(' ', '')
        html = etree.fromstring(html_raw)
        nitf = self.formatter.html2nitf(html)
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), html_raw)

    def test_company_codes(self):
        article = {
            'guid':
            'tag:aap.com.au:20150613:12345',
            '_current_version':
            1,
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'source':
            'AAP',
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001',
                'name': 'international court or tribunal'
            }, {
                'qcode': '02011002',
                'name': 'extradition'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'body_html':
            'The story body',
            'type':
            'text',
            'word_count':
            '1',
            'priority':
            '1',
            '_id':
            'urn:localhost.abc',
            'state':
            'published',
            'urgency':
            2,
            'pubstatus':
            'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline':
            'sample creditline',
            'keywords': ['traffic'],
            'abstract':
            'sample abstract',
            'place': [{
                'qcode': 'Australia',
                'name': 'Australia',
                'state': '',
                'country': 'Australia',
                'world_region': 'Oceania'
            }],
            'company_codes': [{
                'name': 'YANCOAL AUSTRALIA LIMITED',
                'qcode': 'YAL',
                'security_exchange': 'ASX'
            }]
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'type':
            'text',
            'body_html':
            '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count':
            '1',
            'priority':
            1,
            "linked_in_packages": [{
                "package": "package",
                "package_type": "takes"
            }],
        }
        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text,
            'Tommi Mäkinen crashes a Škoda in Äppelbo')

Example #9

Show file

File: nitf_formatter_tests.py Project: MiczFlor/superdesk-core

 def setUp(self):
     super().setUp()
     self.formatter = NITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)

Example #10

Show file

File: nitf_formatter_tests.py Project: MiczFlor/superdesk-core

class NitfFormatterTest(TestCase):
    def setUp(self):
        super().setUp()
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

Example #11

Show file

File: iress_nitf_formatter_test.py Project: mdhaman/superdesk-aap

class IRESSNITFFormatterTest(AAPTestCase):
    line_ender = b'\x19\x0D\x0A'.decode()
    line_feed = 'ZZZZYYYY\n'
    line_prefix = '   '

    def setUp(self):
        self.formatter = IRESSNITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)
        self.setUpData()

    def setUpData(self):
        copyrights = [{
            "_id": "rightsinfo",
            "items": [
                {
                    "usageTerms": "Usage terms goes here.",
                    "copyrightHolder": "Australian Associated Press",
                    "is_active": True,
                    "copyrightNotice": "Notice goes here.",
                    "name": "AAP"
                },
                {
                    "usageTerms": "Usage terms goes here.",
                    "copyrightHolder": "Australian Associated Press",
                    "is_active": True,
                    "copyrightNotice": "Notice goes here.",
                    "name": "default"
                },
                {
                    "usageTerms": "Usage terms goes here.",
                    "copyrightHolder": "Foo bar",
                    "is_active": True,
                    "copyrightNotice": "Notice goes here.",
                    "name": "SUP"
                }
            ],
            "type": "manageable",
            "_etag": "init",
            "display_name": "Copyrights",
        }]
        self.app.data.insert('vocabularies', copyrights)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p><p>test body</p>',
            'slugline': 'keyword',
            'anpa_take_key': 'take-key',
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'original_source': 'EMAIL',
            'type': 'text',
            'priority': '9',
            'source': 'SUP',
            '_id': 'urn:localhost.abc',
            'guid': 'urn:localhost.abc',
            'urgency': 2,
            'word_count': 99,
            'unique_id': 11,
            'place': [{'qcode': 'FED'}],
            'sign_off': 'me',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'byline': 'Bar Foo'
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'FED:test headline')
        self.assertEqual(nitf_xml.find('body/body.content/pre').text,
                         '   Bar FooZZZZYYYY\n   test bodyZZZZYYYY\n   test bodyZZZZYYYY\n   SUP meZZZZYYYY\n')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-sequence"]').get('content'),
                         str(doc['published_seq_num']).zfill(4))
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-keyword"]').get('content'), 'keyword')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-takekey"]').get('content'), 'take-key')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-category"]').get('content'), 'f')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-wordcount"]').get('content'), '0099')
        self.assertEqual(nitf_xml.find('body/body.head/hedline/hl1').text, 'FED:test headline')
        self.assertEqual(nitf_xml.find('head/docdata/doc.copyright').get('year'), '2018')
        self.assertEqual(nitf_xml.find('head/docdata/doc.copyright').get('holder'), 'Foo bar')
        self.assertEqual(nitf_xml.find('head/docdata/doc-id').get('id-string'), 'AAP.20180613.11')
        self.assertEqual(nitf_xml.find('head/docdata/date.issue').get('norm'), '20180613T214519')
        self.assertEqual(nitf_xml.find('head/docdata/date.release').get('norm'), '20180613T214519')
        self.assertEqual(len(nitf_xml.findall('body/body.end')), 0)

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [
                {'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'},
                {'name': 'BHP Billiton Limited', 'qcode': 'BHP', 'security_exchange': 'ASX'}
            ]
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/meta[@name="asx-codes"]').get('content'), "YAL BHP")

    def testDivContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'body_html': '<div>Kathmandu Holdings has lodged a claim in the New Zealand High'
                         'Court for the recovery of costs associated with last years takeover bid from Briscoe'
                         'Group.</div><div>Kathmandu Holdings has lodged a claim in the New Zealand High Court for '
                         'the recovery of costs associated with last years takeover bid from Briscoe Group.'
                         '</div><div><br></div><div><br></div><div>Kathmandu incurred costs in relation to the takeover'
                         'After an initial request for payment on November 20, 2015 and subsequent correspondence, '
                         'Briscoe made a payment of $637,711.65 on May 25, 2016 without prejudice to its position on '
                         'what sum Kathmandu is entitled to recover.</div><div><br></div><div>Kathmandu considers the '
                         'full amount claimed is recoverable and has issued legal proceedings for the balance of monies'
                         ' owed.</div>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue(nitf_xml.find('body/body.content/pre').text.
                        startswith('   joeZZZZYYYY\n   Kathmandu Holdings'))
        self.assertTrue(nitf_xml.find('body/body.content/pre').text.split('\n'), 7)

    def testLFContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">The Australian dollar has tumbled'
                         ' after&nbsp;</span>Standard &amp; Poor\'s warned the country\'s triple-A credit rating '
                         'is at risk.<br></p><p>   At 1200 AEST on Thursday, the currency was trading at 74.98 US '
                         'cents, up from\n\n 74.41\n\n cents on Wednesday, but down from a high of 75.38 on Thursday '
                         'morning.</p><p>S&amp;P downgraded its outlook on Australia\'s credit rating from stable '
                         'to negative, due to the prospect of ongoing budget deficits without substantial reforms '
                         'being passed by parliament.</p><p>Westpac chief currency strategist&nbsp;Robert Rennie '
                         'said the uncertain election outcome is likely to result in a longer run of budget'
                         'deficits.</p><p>\"It was clearly a risk and the market has been living in its shadow since'
                         'Monday morning,\" he said.</p><p>\"Gridlock or the inability to improve the fiscal situation '
                         'over the forecast period is something I think a ratings agency ought to take into '
                         'account.\"</p><p><span style=\"background-color: transparent;\">The currency had a sudden '
                         'plunge to 74.67 US cents on the announcement from S&amp;P, before recovering some of that '
                         'ground.</span></p><p><span style=\"background-color: transparent;\">Mr Rennie tipped the '
                         'Australian dollar will slip further on Thursday.</span></p><p><span style=\"background-color:'
                         'transparent;\">\"We should make fresh lows, we should be pushing down though 74 US cents '
                         'and possibly lower,\" he said.</span></p><p><span style=\"background-color: '
                         'transparent;\">KEY MOVEMENTS:</span></p><p><span style=\"background-color: transparent;\">One'
                         'Australian dollar buys:</span><br></p><p>   * 74.98 US cents, from\n\n 74.41\n\ncents on '
                         'Wednesday</p><p>   * 75.63 Japanese yen, from \n\n75.15\n\n yen</p><p>   * 67.64 euro cents, '
                         'from \n\n67.24\n\n euro cents</p><p>   * 105.01 New Zealand cents, from \n\n104.85\n\n NZ '
                         'cents</p><p>   * 57.96 British pence, from \n\n57.53\n\n pence</p><p>   Government bond '
                         'yields:</p><p>   * CGS 5.25pct March 2019, 1.510pct, from \n\n1.513pct</p><p>   * CGS 4.25pct'
                         'April 2026, 1.868pct, from \n\n1.862pct</p><p>   Sydney Futures Exchange prices:</p><p>   *'
                         'September 2016 10-year bond futures contract, was at 98.125\n\n (1.875\n\n per cent), '
                         'unchanged from Wednesday</p><p>   * September 2016 3-year bond futures contract, at 98.570 '
                         '(1.430 per cent), up from \n\n98.550\n\n (1.450\n\n per cent)</p><p>   (*Currency closes '
                         'taken at 1700 AEST previous local session, bond market closes taken at 1630 AEST previous '
                         'local session)</p><p>   Source: IRESS</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue('from 74.41' in nitf_xml.find('body/body.content/pre').text)

    def testStraySpaceContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'body_html': '<p><span style=\"background-color: transparent;\">\"</span>'
                         '<span style=\"background-color: transparent;\">However</span></p>'
                         '<p>\"<span style=\"background-color: transparent;\">The proposed</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text,
                         '\"However   \"The proposedZZZZYYYY\n   AAPZZZZYYYY\n')

    def testSpacesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'body_html': '<p>a b  c   d&nbsp;e&nbsp;&nbsp;f\xA0g</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text, '   a b c d e  f gZZZZYYYY\n   AAPZZZZYYYY\n')

    def testControlCharsContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">\u0018\u0012\f \u000b\u0012\b</span>'
                         '<span style=\"background-color: transparent;\">\u0005\f\u0006\b \u0006\f\u0019&nbsp;</span>'
                         '</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text, '      AAPZZZZYYYY\n')

    def testNullTakeKeyContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': None,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': None,
            'unique_id': 1,
            'type': 'text',
            'body_html': '<p>no body</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertIsNone(nitf_xml.find('head/meta[@name="anpa-takekey"]'))

    def testLocator(self):
        article = {
            '_id': '4853',
            'slugline': 'Gangs',
            'byline': '',
            'anpa_category': [
                {
                    'name': 'Australian General News',
                    'qcode': 'a'
                }
            ],
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'genre': [
                {
                    'name': 'Article (news)',
                    'qcode': 'Article'
                }
            ],
            'priority': 6,
            'unique_id': 33423059,
            'format': 'HTML',
            'guid': '4853',
            'headline': 'Drop gang campaign, bishop tells Vic Libs',
            'source': 'TEST',
            'subject': [
                {
                    'name': 'crime, law and justice',
                    'qcode': '02000000'
                }
            ],
            'flags': {
                'marked_for_sms': False,
                'marked_for_not_publication': False,
                'marked_archived_only': False,
                'marked_for_legal': False
            },
            'anpa_take_key': None,
            'pubstatus': 'usable',
            'schedule_settings': {
                'time_zone': None,
                'utc_embargo': None,
                'utc_publish_schedule': None
            },
            'urgency': 5,
            'word_count': 83,
            'type': 'text',
            'place': [
                {
                    'name': 'VIC',
                    'qcode': 'VIC',
                    'world_region': 'Oceania',
                    'state': 'Victoria',
                    'country': 'Australia',
                    'group': 'Australia'
                }
            ],
            'state': 'corrected',
            'body_html': '<p>A Melbourne-based Anglican bishop is calling on the Liberal party</P',
            '_current_version': 3,
            'sign_off': 'MG',
            'ednote': 'In the story \'Gangs\' sent at: 16/07/2018 15:26\r\n\r\nThis is a corrected repeat.',
            'sms_message': ''
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'VIC:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(nitf_xml.find('body/body.head/hedline/hl1').text,
                         'VIC:Drop gang campaign, bishop tells Vic Libs')

        article['anpa_category'] = [{'name': 'Domestic Sports', 'qcode': 't'}]
        article['subject'] = [{'name': 'Cricket', 'qcode': '15017000'}]
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'CRIK:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(nitf_xml.find('body/body.head/hedline/hl1').text,
                         'CRIK:Drop gang campaign, bishop tells Vic Libs')

    def testSmartQuotes(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': None,
            'format': 'HTML',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0, tzinfo=pytz.UTC),
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': None,
            'unique_id': 1,
            'type': 'text',
            'body_html': '<p>“Why is society being so childish.” and the Yen ¥</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('body/body.content/pre').text, '   "Why is society being so childish." and '
                                                                      'the Yen Y=ZZZZYYYY\n   AAPZZZZYYYY\n')

Example #12

Show file

File: nitf_formatter_tests.py Project: mscam/superdesk-core

class NitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_html2nitf(self):
        html = etree.fromstring(dedent("""\
            <div>
                <unknown>
                    <p>
                        this should be still here
                    </p>
                </unknown>
                <p style="this='is';some='style'">
                    <strong>this text should be
                        <i>modified</i>
                    </strong>
                    so
                    <span>[this should not be removed]</span>
                    unkown
                    <em unknown_attribute="toto">elements</em>
                    and
                    <a bad_attribute="to_remove">attributes</a>
                    are
                    <h6>removed</h6>
                </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        this should be still here
                    </p>
                <p>
                    <em class="bold">this text should be
                        <em class="italic">modified</em>
                    </em>
                    so [this should not be removed] unkown
                    <em class="italic">elements</em>
                    and
                    <a>attributes</a>
                    are
                    <hl2>removed</hl2>
                </p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(etree.tostring(nitf, 'unicode').replace('\n', '').replace(' ', ''), expected)

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')

Example #13

Show file

 def setUp(self):
     self.formatter = AAPNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)

Example #14

Show file

class AAPNitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = AAPNITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'slugline': 'keyword',
            'anpa_take_key': 'take-key',
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'original_source': 'EMAIL',
            'type': 'text',
            'priority': '9',
            'source': 'SUP',
            '_id': 'urn:localhost.abc',
            'urgency': 2,
            'place': [{'qcode': 'FED'}],
            'sign_off': 'me'
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-priority"]').get('content'), '9')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-sequence"]').get('content'),
                         str(doc['published_seq_num']))
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-keyword"]').get('content'), 'keyword')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-takekey"]').get('content'), 'take-key')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-source"]').get('content'), 'SUP')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-original-source"]').get('content'), 'EMAIL')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-place"]').get('content'), 'FED')
        self.assertEqual(nitf_xml.find('head/meta[@name="aap-signoff"]').get('content'), 'me')
        self.assertEqual(nitf_xml.find('head/meta[@name="anpa-category"]').get('content'), 'f')

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testDivContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<div>Kathmandu Holdings has lodged a claim in the New Zealand High'
                         'Court for the recovery of costs associated with last years takeover bid from Briscoe'
                         'Group.</div><div>Kathmandu Holdings has lodged a claim in the New Zealand High Court for '
                         'the recovery of costs associated with last years takeover bid from Briscoe Group.'
                         '</div><div><br></div><div>Kathmandu incurred costs in relation to the takeover bid. '
                         'After an initial request for payment on November 20, 2015 and subsequent correspondence, '
                         'Briscoe made a payment of $637,711.65 on May 25, 2016 without prejudice to its position on '
                         'what sum Kathmandu is entitled to recover.</div><div><br></div><div>Kathmandu considers the '
                         'full amount claimed is recoverable and has issued legal proceedings for the balance of monies'
                         ' owed.</div>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertTrue(nitf_xml.findall('body/body.content/p')[5].text.startswith('Kathmandu considers'))

    def testLFContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">The Australian dollar has tumbled'
                         ' after&nbsp;</span>Standard &amp; Poor\'s warned the country\'s triple-A credit rating '
                         'is at risk.<br></p><p>   At 1200 AEST on Thursday, the currency was trading at 74.98 US '
                         'cents, up from\n\n 74.41\n\n cents on Wednesday, but down from a high of 75.38 on Thursday '
                         'morning.</p><p>S&amp;P downgraded its outlook on Australia\'s credit rating from stable '
                         'to negative, due to the prospect of ongoing budget deficits without substantial reforms '
                         'being passed by parliament.</p><p>Westpac chief currency strategist&nbsp;Robert Rennie '
                         'said the uncertain election outcome is likely to result in a longer run of budget'
                         'deficits.</p><p>\"It was clearly a risk and the market has been living in its shadow since'
                         'Monday morning,\" he said.</p><p>\"Gridlock or the inability to improve the fiscal situation '
                         'over the forecast period is something I think a ratings agency ought to take into '
                         'account.\"</p><p><span style=\"background-color: transparent;\">The currency had a sudden '
                         'plunge to 74.67 US cents on the announcement from S&amp;P, before recovering some of that '
                         'ground.</span></p><p><span style=\"background-color: transparent;\">Mr Rennie tipped the '
                         'Australian dollar will slip further on Thursday.</span></p><p><span style=\"background-color:'
                         'transparent;\">\"We should make fresh lows, we should be pushing down though 74 US cents '
                         'and possibly lower,\" he said.</span></p><p><span style=\"background-color: '
                         'transparent;\">KEY MOVEMENTS:</span></p><p><span style=\"background-color: transparent;\">One'
                         'Australian dollar buys:</span><br></p><p>   * 74.98 US cents, from\n\n 74.41\n\ncents on '
                         'Wednesday</p><p>   * 75.63 Japanese yen, from \n\n75.15\n\n yen</p><p>   * 67.64 euro cents, '
                         'from \n\n67.24\n\n euro cents</p><p>   * 105.01 New Zealand cents, from \n\n104.85\n\n NZ '
                         'cents</p><p>   * 57.96 British pence, from \n\n57.53\n\n pence</p><p>   Government bond '
                         'yields:</p><p>   * CGS 5.25pct March 2019, 1.510pct, from \n\n1.513pct</p><p>   * CGS 4.25pct'
                         'April 2026, 1.868pct, from \n\n1.862pct</p><p>   Sydney Futures Exchange prices:</p><p>   *'
                         'September 2016 10-year bond futures contract, was at 98.125\n\n (1.875\n\n per cent), '
                         'unchanged from Wednesday</p><p>   * September 2016 3-year bond futures contract, at 98.570 '
                         '(1.430 per cent), up from \n\n98.550\n\n (1.450\n\n per cent)</p><p>   (*Currency closes '
                         'taken at 1700 AEST previous local session, bond market closes taken at 1630 AEST previous '
                         'local session)</p><p>   Source: IRESS</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertTrue('from 74.41' in nitf_xml.findall('body/body.content/p')[1].text)

    def testStraySpaceContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">\"</span>'
                         '<span style=\"background-color: transparent;\">However</span></p>'
                         '<p>\"<span style=\"background-color: transparent;\">The proposed</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, '"However')

    def testNoneAsciNamesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Makinen crashes a Skoda in Appelbo')

    def testSpacesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>a b  c   d&nbsp;e&nbsp;&nbsp;f\xA0g</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'a b c d e  f g')

    def testControlCharsContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p><span style=\"background-color: transparent;\">\u0018\u0012\f \u000b\u0012\b</span>'
                         '<span style=\"background-color: transparent;\">\u0005\f\u0006\b \u0006\f\u0019&nbsp;</span>'
                         '</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc['formatted_item'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, ' ')

    def testNullTakeKeyContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': None,
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': None,
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>no body</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        resp = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(resp['formatted_item'])
        self.assertIsNone(nitf_xml.find('head/meta[@name="anpa-takekey"]'))

Example #15

Show file

File: iress_nitf_formatter_test.py Project: mdhaman/superdesk-aap

 def setUp(self):
     self.formatter = IRESSNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.setUpData()

Example #16

Show file

File: nitf_formatter_tests.py Project: lnogues/superdesk-core

class NitfFormatterTest(TestCase):
    def setUp(self):
        super().setUp()
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': True
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': False
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(
            nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_company_codes(self):
        article = {
            'guid':
            'tag:aap.com.au:20150613:12345',
            '_current_version':
            1,
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'source':
            'AAP',
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001',
                'name': 'international court or tribunal'
            }, {
                'qcode': '02011002',
                'name': 'extradition'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'body_html':
            'The story body',
            'type':
            'text',
            'word_count':
            '1',
            'priority':
            '1',
            '_id':
            'urn:localhost.abc',
            'state':
            'published',
            'urgency':
            2,
            'pubstatus':
            'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline':
            'sample creditline',
            'keywords': ['traffic'],
            'abstract':
            'sample abstract',
            'place': [{
                'qcode': 'Australia',
                'name': 'Australia',
                'state': '',
                'country': 'Australia',
                'world_region': 'Oceania'
            }],
            'company_codes': [{
                'name': 'YANCOAL AUSTRALIA LIMITED',
                'qcode': 'YAL',
                'security_exchange': 'ASX'
            }]
        }

        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            '1',
            'type':
            'text',
            'body_html':
            '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count':
            '1',
            'priority':
            1,
            "linked_in_packages": [{
                "package": "package",
                "package_type": "takes"
            }],
        }
        seq, doc = self.formatter.format(article,
                                         {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(
            nitf_xml.find('body/body.content/p').text,
            'Tommi Mäkinen crashes a Škoda in Äppelbo')

Example #17

Show file

File: iress_nitf_formatter_test.py Project: jerome-poisson/superdesk-aap

 def setUp(self):
     self.formatter = IRESSNITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)
     self.setUpData()

Example #18

Show file

File: iress_nitf_formatter_test.py Project: jerome-poisson/superdesk-aap

class IRESSNITFFormatterTest(TestCase):
    line_ender = b'\x19\x0D\x0A'.decode()
    line_feed = 'ZZZZYYYY\n'
    line_prefix = '   '

    def setUp(self):
        self.formatter = IRESSNITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)
        self.setUpData()

    def setUpData(self):
        copyrights = [{
            "_id":
            "rightsinfo",
            "items": [{
                "usageTerms": "Usage terms goes here.",
                "copyrightHolder": "Australian Associated Press",
                "is_active": True,
                "copyrightNotice": "Notice goes here.",
                "name": "AAP"
            }, {
                "usageTerms": "Usage terms goes here.",
                "copyrightHolder": "Australian Associated Press",
                "is_active": True,
                "copyrightNotice": "Notice goes here.",
                "name": "default"
            }, {
                "usageTerms": "Usage terms goes here.",
                "copyrightHolder": "Foo bar",
                "is_active": True,
                "copyrightNotice": "Notice goes here.",
                "name": "SUP"
            }],
            "type":
            "manageable",
            "_etag":
            "init",
            "display_name":
            "Copyrights",
        }]
        self.app.data.insert('vocabularies', copyrights)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': True
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {
                'marked_for_legal': False
            }
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p><p>test body</p>',
            'slugline': 'keyword',
            'anpa_take_key': 'take-key',
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'original_source': 'EMAIL',
            'type': 'text',
            'priority': '9',
            'source': 'SUP',
            '_id': 'urn:localhost.abc',
            'guid': 'urn:localhost.abc',
            'urgency': 2,
            'word_count': 99,
            'unique_id': 11,
            'place': [{
                'qcode': 'FED'
            }],
            'sign_off': 'me',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0),
            'byline': 'Bar Foo'
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(nitf_xml.find('head/title').text, 'FED:test headline')
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text,
            '   Bar FooZZZZYYYY\n   test bodyZZZZYYYY\n   test bodyZZZZYYYY\n   SUP meZZZZYYYY\n'
        )
        self.assertEqual(
            nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-sequence"]').get('content'),
            str(doc['published_seq_num']).zfill(4))
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-keyword"]').get('content'),
            'keyword')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-takekey"]').get('content'),
            'take-key')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-category"]').get('content'),
            'f')
        self.assertEqual(
            nitf_xml.find('head/meta[@name="anpa-wordcount"]').get('content'),
            '0099')
        self.assertEqual(
            nitf_xml.find('body/body.head/hedline/hl1').text,
            'FED:test headline')
        self.assertEqual(
            nitf_xml.find('head/docdata/doc.copyright').get('year'), '2018')
        self.assertEqual(
            nitf_xml.find('head/docdata/doc.copyright').get('holder'),
            'Foo bar')
        self.assertEqual(
            nitf_xml.find('head/docdata/doc-id').get('id-string'),
            'AAP.20180613.11')
        self.assertEqual(
            nitf_xml.find('head/docdata/date.issue').get('norm'),
            article.get('versioncreated').strftime('%Y%m%dT%H%M%S'))
        self.assertEqual(
            nitf_xml.find('head/docdata/date.release').get('norm'),
            article.get('versioncreated').strftime('%Y%m%dT%H%M%S'))
        self.assertEqual(len(nitf_xml.findall('body/body.end')), 0)

    def test_company_codes(self):
        article = {
            'guid':
            'tag:aap.com.au:20150613:12345',
            '_current_version':
            1,
            'anpa_category': [{
                'qcode': 'f',
                'name': 'Finance'
            }],
            'source':
            'AAP',
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001',
                'name': 'international court or tribunal'
            }, {
                'qcode': '02011002',
                'name': 'extradition'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'body_html':
            'The story body',
            'type':
            'text',
            'word_count':
            '1',
            'priority':
            '1',
            '_id':
            'urn:localhost.abc',
            'state':
            'published',
            'urgency':
            2,
            'pubstatus':
            'usable',
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline':
            'sample creditline',
            'keywords': ['traffic'],
            'abstract':
            'sample abstract',
            'place': [{
                'qcode': 'Australia',
                'name': 'Australia',
                'state': '',
                'country': 'Australia',
                'world_region': 'Oceania'
            }],
            'company_codes': [{
                'name': 'YANCOAL AUSTRALIA LIMITED',
                'qcode': 'YAL',
                'security_exchange': 'ASX'
            }, {
                'name': 'BHP Billiton Limited',
                'qcode': 'BHP',
                'security_exchange': 'ASX'
            }]
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('head/meta[@name="asx-codes"]').get('content'),
            "YAL BHP")

    def testDivContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'type':
            'text',
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'body_html':
            '<div>Kathmandu Holdings has lodged a claim in the New Zealand High'
            'Court for the recovery of costs associated with last years takeover bid from Briscoe'
            'Group.</div><div>Kathmandu Holdings has lodged a claim in the New Zealand High Court for '
            'the recovery of costs associated with last years takeover bid from Briscoe Group.'
            '</div><div><br></div><div><br></div><div>Kathmandu incurred costs in relation to the takeover'
            'After an initial request for payment on November 20, 2015 and subsequent correspondence, '
            'Briscoe made a payment of $637,711.65 on May 25, 2016 without prejudice to its position on '
            'what sum Kathmandu is entitled to recover.</div><div><br></div><div>Kathmandu considers the '
            'full amount claimed is recoverable and has issued legal proceedings for the balance of monies'
            ' owed.</div>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue(
            nitf_xml.find('body/body.content/pre').text.startswith(
                '   joeZZZZYYYY\n   Kathmandu Holdings'))
        self.assertTrue(
            nitf_xml.find('body/body.content/pre').text.split('\n'), 7)

    def testLFContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'byline':
            'joe',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'type':
            'text',
            'body_html':
            '<p><span style=\"background-color: transparent;\">The Australian dollar has tumbled'
            ' after&nbsp;</span>Standard &amp; Poor\'s warned the country\'s triple-A credit rating '
            'is at risk.<br></p><p>   At 1200 AEST on Thursday, the currency was trading at 74.98 US '
            'cents, up from\n\n 74.41\n\n cents on Wednesday, but down from a high of 75.38 on Thursday '
            'morning.</p><p>S&amp;P downgraded its outlook on Australia\'s credit rating from stable '
            'to negative, due to the prospect of ongoing budget deficits without substantial reforms '
            'being passed by parliament.</p><p>Westpac chief currency strategist&nbsp;Robert Rennie '
            'said the uncertain election outcome is likely to result in a longer run of budget'
            'deficits.</p><p>\"It was clearly a risk and the market has been living in its shadow since'
            'Monday morning,\" he said.</p><p>\"Gridlock or the inability to improve the fiscal situation '
            'over the forecast period is something I think a ratings agency ought to take into '
            'account.\"</p><p><span style=\"background-color: transparent;\">The currency had a sudden '
            'plunge to 74.67 US cents on the announcement from S&amp;P, before recovering some of that '
            'ground.</span></p><p><span style=\"background-color: transparent;\">Mr Rennie tipped the '
            'Australian dollar will slip further on Thursday.</span></p><p><span style=\"background-color:'
            'transparent;\">\"We should make fresh lows, we should be pushing down though 74 US cents '
            'and possibly lower,\" he said.</span></p><p><span style=\"background-color: '
            'transparent;\">KEY MOVEMENTS:</span></p><p><span style=\"background-color: transparent;\">One'
            'Australian dollar buys:</span><br></p><p>   * 74.98 US cents, from\n\n 74.41\n\ncents on '
            'Wednesday</p><p>   * 75.63 Japanese yen, from \n\n75.15\n\n yen</p><p>   * 67.64 euro cents, '
            'from \n\n67.24\n\n euro cents</p><p>   * 105.01 New Zealand cents, from \n\n104.85\n\n NZ '
            'cents</p><p>   * 57.96 British pence, from \n\n57.53\n\n pence</p><p>   Government bond '
            'yields:</p><p>   * CGS 5.25pct March 2019, 1.510pct, from \n\n1.513pct</p><p>   * CGS 4.25pct'
            'April 2026, 1.868pct, from \n\n1.862pct</p><p>   Sydney Futures Exchange prices:</p><p>   *'
            'September 2016 10-year bond futures contract, was at 98.125\n\n (1.875\n\n per cent), '
            'unchanged from Wednesday</p><p>   * September 2016 3-year bond futures contract, at 98.570 '
            '(1.430 per cent), up from \n\n98.550\n\n (1.450\n\n per cent)</p><p>   (*Currency closes '
            'taken at 1700 AEST previous local session, bond market closes taken at 1630 AEST previous '
            'local session)</p><p>   Source: IRESS</p>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertTrue(
            'from 74.41' in nitf_xml.find('body/body.content/pre').text)

    def testStraySpaceContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'type':
            'text',
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'body_html':
            '<p><span style=\"background-color: transparent;\">\"</span>'
            '<span style=\"background-color: transparent;\">However</span></p>'
            '<p>\"<span style=\"background-color: transparent;\">The proposed</p>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text,
            '\"However   \"The proposedZZZZYYYY\n   AAPZZZZYYYY\n')

    def testSpacesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline': 'This is a test headline',
            'slugline': 'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key': 'take_key',
            'unique_id': 1,
            'type': 'text',
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0),
            'body_html': '<p>a b  c   d&nbsp;e&nbsp;&nbsp;f\xA0g</p>',
            'word_count': '1',
            'priority': 1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text,
            '   a b c d e  f gZZZZYYYY\n   AAPZZZZYYYY\n')

    def testControlCharsContent(self):
        article = {
            '_id':
            '3',
            'source':
            'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline':
            'This is a test headline',
            'slugline':
            'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key':
            'take_key',
            'unique_id':
            1,
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'type':
            'text',
            'body_html':
            '<p><span style=\"background-color: transparent;\">\u0018\u0012\f \u000b\u0012\b</span>'
            '<span style=\"background-color: transparent;\">\u0005\f\u0006\b \u0006\f\u0019&nbsp;</span>'
            '</p>',
            'word_count':
            '1',
            'priority':
            1
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('body/body.content/pre').text, '      AAPZZZZYYYY\n')

    def testNullTakeKeyContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{
                'qcode': 'a'
            }],
            'headline': 'This is a test headline',
            'byline': None,
            'versioncreated': datetime(2018, 6, 13, 11, 45, 19, 0),
            'slugline': 'slugline',
            'subject': [{
                'qcode': '02011001'
            }],
            'anpa_take_key': None,
            'unique_id': 1,
            'type': 'text',
            'body_html': '<p>no body</p>',
            'word_count': '1',
            'priority': 1,
            'abstract': None
        }
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertIsNone(nitf_xml.find('head/meta[@name="anpa-takekey"]'))

    def testLocator(self):
        article = {
            '_id':
            '4853',
            'slugline':
            'Gangs',
            'byline':
            '',
            'anpa_category': [{
                'name': 'Australian General News',
                'qcode': 'a'
            }],
            'versioncreated':
            datetime(2018, 6, 13, 11, 45, 19, 0),
            'genre': [{
                'name': 'Article (news)',
                'qcode': 'Article'
            }],
            'priority':
            6,
            'unique_id':
            33423059,
            'format':
            'HTML',
            'guid':
            '4853',
            'headline':
            'Drop gang campaign, bishop tells Vic Libs',
            'source':
            'TEST',
            'subject': [{
                'name': 'crime, law and justice',
                'qcode': '02000000'
            }],
            'flags': {
                'marked_for_sms': False,
                'marked_for_not_publication': False,
                'marked_archived_only': False,
                'marked_for_legal': False
            },
            'anpa_take_key':
            None,
            'pubstatus':
            'usable',
            'schedule_settings': {
                'time_zone': None,
                'utc_embargo': None,
                'utc_publish_schedule': None
            },
            'urgency':
            5,
            'word_count':
            83,
            'type':
            'text',
            'place': [{
                'name': 'VIC',
                'qcode': 'VIC',
                'world_region': 'Oceania',
                'state': 'Victoria',
                'country': 'Australia',
                'group': 'Australia'
            }],
            'state':
            'corrected',
            'body_html':
            '<p>A Melbourne-based Anglican bishop is calling on the Liberal party</P',
            '_current_version':
            3,
            'sign_off':
            'MG',
            'ednote':
            'In the story \'Gangs\' sent at: 16/07/2018 15:26\r\n\r\nThis is a corrected repeat.',
            'sms_message':
            ''
        }

        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('head/title').text,
            'VIC:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(
            nitf_xml.find('body/body.head/hedline/hl1').text,
            'VIC:Drop gang campaign, bishop tells Vic Libs')

        article['anpa_category'] = [{'name': 'Domestic Sports', 'qcode': 't'}]
        article['subject'] = [{'name': 'Cricket', 'qcode': '15017000'}]
        doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        item = doc['formatted_item'].replace(self.line_ender, self.line_feed)
        nitf_xml = etree.fromstring(item)
        self.assertEqual(
            nitf_xml.find('head/title').text,
            'CRIK:Drop gang campaign, bishop tells Vic Libs')
        self.assertEqual(
            nitf_xml.find('body/body.head/hedline/hl1').text,
            'CRIK:Drop gang campaign, bishop tells Vic Libs')

Example #19

Show file

File: nitf_formatter_tests.py Project: lnogues/superdesk-core

 def setUp(self):
     super().setUp()
     self.formatter = NITFFormatter()
     self.base_formatter = Formatter()
     init_app(self.app)

Example #20

Show file

File: nitf_formatter_tests.py Project: jerome-poisson/superdesk-core

class NitfFormatterTest(TestCase):
    def setUp(self):
        self.formatter = NITFFormatter()
        self.base_formatter = Formatter()
        init_app(self.app)

    def test_append_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': True}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Legal: Obama Republican Healthc')
        slugline = self.base_formatter.append_legal(article, truncate=True)
        self.assertEqual(slugline, 'Legal: Obama Republican ')

    def test_append_legal_when_not_legal(self):
        article = {
            'slugline': 'Obama Republican Healthc',
            'flags': {'marked_for_legal': False}
        }

        slugline = self.base_formatter.append_legal(article)
        self.assertEqual(slugline, 'Obama Republican Healthc')

    def test_formatter(self):
        article = {
            'headline': 'test headline',
            'body_html': '<p>test body</p>',
            'type': 'text',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'urgency': 2
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('head/title').text, article['headline'])
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body')
        self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2')

    def test_html2nitf(self):
        html = etree.fromstring(dedent("""\
            <div>
                <unknown>
                    <p>
                        this should be still here
                    </p>
                </unknown>
                <p style="this='is';some='style'">
                    <strong>this text should be
                        <i>modified</i>
                    </strong>
                    so
                    <span>[this should not be removed]</span>
                    unkown
                    <em unknown_attribute="toto">elements</em>
                    and
                    <a bad_attribute="to_remove">attributes</a>
                    are
                    <h6>removed</h6>
                </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        this should be still here
                    </p>
                <p>
                    <em class="bold">this text should be
                        <em class="italic">modified</em>
                    </em>
                    so [this should not be removed] unkown
                    <em class="italic">elements</em>
                    and
                    <a>attributes</a>
                    are
                    <hl2>removed</hl2>
                </p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected)

    def test_html2nitf_br(self):
        """Check that <br/> is kept if it is a child of and enrichedText parent element"""
        html = etree.fromstring(dedent("""\
            <div>
                <br/>the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                the previous tag should be removed (but not the text)
                    <p>
                        the following tag <br/> should still be here
                        and the next one <br/> too
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_body_content_br(self):
        article = {
            "_id": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "guid": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0",
            "state": "published",
            "body_html": "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it "
                         "may have been a 10-minute delay by police that cost two lives.</p><p>Shortly after 2am "
                         "on December 16, 2014, gunman Man Haron Monis fired at escaping hostages.</p><p>That first "
                         "shot by Monis&nbsp;should have satisfied a so-called \"secondary trigger\" prompting "
                         "police to storm the Martin Place stronghold with some element of surprise, NSW Coroner "
                         "Michael Barnes found on Wednesday.</p><p>\"(But) the 10 minutes that lapsed without "
                         "decisive action by police was too long,\" Mr Barnes&nbsp;said in Sydney.</p><p>By the "
                         "time police smashed their way into the cafe at 2.14am in a flurry of stun-grenade "
                         "explosions, manager Tori Johnson had been forced to his knees and shot in the head."
                         "</p><p>Barrister Katrina Dawson was killed by police bullet fragments.</p><p>New police "
                         "commissioner Mick Fuller - who was one of the first commanders at the scene - admitted on "
                         "Wednesday tactical officers should have gone in earlier.</p><p>Mr Fuller went further than "
                         "the coroner when he told the Ten Network: \"We probably should have gone in before the "
                         "first shot.\"</p><p>\"Early intervention for terrorist incidents is the way forward, "
                         "knowing what we know now,\" he said.</p><p>\"But nevertheless it's still a very "
                         "dangerous tactic and people will potentially still lose their lives.\"</p><p>Mr Barnes "
                         "has made 45 findings on everything from police negotiation strategies to greater legal "
                         "protections for frontline officers in terrorist situations.<br></p><p>He lay the blame "
                         "for the loss of life squarely at the feet of Monis, but other parties, including prosecution "
                         "solicitors and a consulting psychiatrist, copped some criticism.</p><p>The cafe "
                         "was \"transformed into a prison run by a vicious maniac\" that day, Mr Barnes "
                         "said.<br></p><p class=\"\">The traditional \"contain and negotiate\" strategy was "
                         "appropriate early on but police failed to adequately reassess taking into account the "
                         "headway they were making with Monis.<br></p><p class=\"\">\"Sadly, it failed,\" Mr Barnes "
                         "said on Wednesday.<br></p><p class=\"\">\"The 'contain and negotiate' approach may not be "
                         "the best ongoing response to a terrorist incident if the offenders believe, whether or not "
                         "they survive, their cause will benefit from the publicity generated by a protracted "
                         "siege.\"<br></p><p class=\"\">Mr Fuller said the containment strategy had saved countless "
                         "lives over the years and wouldn't be abandoned for non-terrorist incidents.</p>"
                         "<p class=\"\">Police mistakenly thought Monis' backpack could house explosives, "
                         "but Mr Barnes noted senior officers were \"unduly reluctant\" to initiate direct action "
                         "plans during siege situations.</p><p class=\"\">The \"primary trigger\" for such an "
                         "assault was identified as the death or serious injury of a hostage - evidence which "
                         "disgusted the families of victims and survivors.</p><p class=\"\">The police response was "
                         "at times hampered by mishaps.</p><p>Eight calls to a phone number hostages expected would "
                         "connect them with negotiators were missed, which was a \"significant failure\", Mr Barnes "
                         "said.</p><p>Commanders and tactical officers received specialist terrorist training but "
                         "negotiators received \"little, if any\", the coroner added.</p><p>The stand-off could not "
                         "have eventuated in the first place if Monis had not been granted bail on accessory to murder"
                         " and dozens of sexual assault offences.<br></p><p>Mr Barnes found the work of an Office of "
                         "the Director of Public Prosecutions solicitor in December 2013 was inadequate, "
                         "\"erroneously\" advising a court Monis didn't have to show exceptional circumstances "
                         "in arguing for bail.</p><p>Police also made a mistake by issuing Monis with a court "
                         "attendance notice for the sexual offences in October 2014 rather than arresting him"
                         ".</p><p>Monis was already on bail at the time for a commonwealth offence after he'd "
                         "written offensive letters to the families of Australian soldiers killed in the Middle"
                         " East.</p><p>States can find it difficult to access commonwealth records, Mr Barnes said,"
                         " and he called for that to be remedied.</p><p>Some of the coroner's harshest individual "
                         "criticism was reserved for the consultant psychiatrist who advised police.</p>"
                         "<p>His \"sub-optimal\" performance included a belief that Monis was merely grandstanding,"
                         " Mr Barnes found.</p><p>The doctor should not have been permitted to advise on negotiation "
                         "strategy and he made \"erroneous and unrealistic assessments\" of what was happening inside "
                         "the cafe.</p><p>The psychiatrist's advice was ambiguous and Islamic terrorism was beyond his"
                         " expertise.</p><p>\"The police commanders underestimated the threat Monis posed,\" Mr Barnes"
                         " said, in part blaming their reliance upon the psychiatrist's opinion.</p><p>He recommended "
                         "a more diverse panel of experts be used in the future.</p><p>The coroner stated police "
                         "snipers couldn't have ended the siege despite a 10-minute window where they had clear sight "
                         "of a head that could have been Monis.</p><p>Those gunmen were never confident in their legal "
                         "justification for a \"kill shot\" and Mr Barnes suggested their&nbsp;power to use force "
                         "should be more clearly defined.</p><p>The coroner did acknowledge that sending tactical "
                         "officers into the cafe after their hand was forced was a decision no commander would "
                         "ever want to face.</p><p>\"The bravery of these officers inspires awe,\" he said.<br></p>",
            "pubstatus": "usable",
            "type": "text",
            "abstract": "<p>The NSW coroner believes a 10-minute period of inaction by police before the bloody end "
                        "of the 2014 Lindt Cafe siege was \"too long\".</p>",
            "priority": 6,
            "unique_id": 12055427,
            "format": "HTML",
            "genre": [
                {
                    "qcode": "Wrap",
                    "name": "Wrap"
                }
            ],
            "word_count": 843,
            "source": "AAP",
            "urgency": 1,
            "subject": [
                {
                    "qcode": "16001000",
                    "parent": "16000000",
                    "name": "act of terror"
                },
                {
                    "qcode": "02001010",
                    "parent": "02001000",
                    "name": "terrorism"
                }
            ],
            "flags": {
                "marked_archived_only": False,
                "marked_for_legal": False,
                "marked_for_not_publication": False,
                "marked_for_sms": False
            },
            "headline": "'Ten minutes was too long': Lindt siege",
            "dateline": {
                "source": "AAP",
                "text": "SYDNEY, May 24 AAP -",
                "located": {
                    "city": "Sydney",
                    "country_code": "AU",
                    "country": "Australia",
                    "dateline": "city",
                    "state_code": "NSW",
                    "state": "New South Wales",
                    "alt_name": "",
                    "tz": "Australia/Sydney",
                    "city_code": "Sydney"
                }
            },
            "anpa_category": [
                {
                    "qcode": "a",
                    "name": "Australian General News"
                }
            ],
            "unique_name": "#12055427",
            "place": [
                {
                    "name": "NSW",
                    "qcode": "NSW",
                    "country": "Australia",
                    "world_region": "Oceania",
                    "group": "Australia",
                    "state": "New South Wales"
                }
            ],
            "sign_off": "SN/jmk/jcd/pmu",
            "anpa_take_key": "2nd Wrap (pix/video available)",
            "language": "en",
            "slugline": "Cafe",
            "byline": "Jamie McKinnell",
            "version": 2,
        }

        response = self.formatter.format(article, {})
        nitf_xml = etree.fromstring(response[0][1])
        self.assertEqual(etree.tostring(nitf_xml.find('body/body.content/p'), encoding="unicode"),
                         "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it may "
                         "have been a 10-minute delay by police that cost two lives.</p>\n      ")
        self.assertTrue(nitf_xml.findall('body/body.content/p')[1].text.startswith('Shortly after 2am'))

    def test_html2nitf_br_last(self):
        """Check that last <br/> in a <p> element is removed"""
        html = etree.fromstring(dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:<br/>
                    </p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                    <p>
                        the following tag <br/> should still be here
                    </p>
                    <p>
                        and the next one <br/> too
                    </p>
                    <p>
                        but not the last one:
                    </p>
            </div>""")
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected)

    def test_html2nitf_style_cleaning(self):
        """Check that <style> element and style attribute are removed from HTML"""
        html = etree.fromstring(dedent("""\
            <div>
                <style type="text/css">
                    p { margin-bottom: 0.25cm; line-height: 120%; }a:link {  }
                </style>
                <p style="margin-bottom: 0cm; line-height: 100%">Test bla bla bla</p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <br/>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">
                    <font face="DejaVu Sans, sans-serif">
                        <font style="font-size: 14pt" size="4">
                            <i>
                                <u>
                                    <b>test</b>
                                </u>
                            </i>
                        </font>
                    </font>
                </p>
                <p style="margin-bottom: 0cm; line-height: 100%">toto</p>
                <p style="margin-bottom: 0cm; line-height: 100%">titi</p>
            </div>
            """))

        nitf = self.formatter.html2nitf(html, attr_remove=['style'])

        expected = dedent("""\
            <div>
                <p>Test bla bla bla</p>
                <p>
                </p>
                <p>
                    <em class="italic">
                        <em class="underscore">
                            <em class="bold">test</em>
                        </em>
                    </em>
                </p>
                <p>toto</p>
                <p>titi</p>
            </div>""").replace('\n', '').replace(' ', '')
        self.assertEqual(etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected)

    def test_table(self):
        html_raw = """
        <div>
        <table>
            <tbody>
                <tr>
                    <td>Table cell 1</td>
                    <td>Table cell 2</td>
                    <td>Table cell 3</td>
                </tr>
                <tr>
                    <td>Table cell 2.1</td>
                    <td>Table cell 2.2</td>
                    <td>Table cell 2.3</td>
                </tr>
                <tr>
                    <td>Table cell 3.1</td>
                    <td>Table cell 3.2</td>
                    <td>Table cell 3.3</td>
                </tr>
            </tbody>
        </table>
        </div>
        """.replace('\n', '').replace(' ', '')
        html = etree.fromstring(html_raw)
        nitf = self.formatter.html2nitf(html)
        self.assertEqual(etree.tostring(nitf, encoding='unicode'), html_raw)

    def test_company_codes(self):
        article = {
            'guid': 'tag:aap.com.au:20150613:12345',
            '_current_version': 1,
            'anpa_category': [{'qcode': 'f', 'name': 'Finance'}],
            'source': 'AAP',
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'},
                        {'qcode': '02011002', 'name': 'extradition'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'body_html': 'The story body',
            'type': 'text',
            'word_count': '1',
            'priority': '1',
            '_id': 'urn:localhost.abc',
            'state': 'published',
            'urgency': 2,
            'pubstatus': 'usable',
            'dateline': {
                'source': 'AAP',
                'text': 'Los Angeles, Aug 11 AAP -',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA'
                }
            },
            'creditline': 'sample creditline',
            'keywords': ['traffic'],
            'abstract': 'sample abstract',
            'place': [{'qcode': 'Australia', 'name': 'Australia',
                       'state': '', 'country': 'Australia',
                       'world_region': 'Oceania'}],
            'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}]
        }

        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        company = nitf_xml.find('body/body.head/org')
        self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED')
        self.assertEqual(company.attrib.get('idsrc', ''), 'ASX')
        self.assertEqual(company.attrib.get('value', ''), 'YAL')

    def testNoneAsciNamesContent(self):
        article = {
            '_id': '3',
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>',
            'word_count': '1',
            'priority': 1,
            "linked_in_packages": [
                {
                    "package": "package",
                    "package_type": "takes"
                }
            ],
        }
        seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0]
        nitf_xml = etree.fromstring(doc)
        self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')