def format(self, article, subscriber, codes=None): """Create article in NewsML G2 format :param dict article: :param dict subscriber: :param list codes: selector codes :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: self.subscriber = subscriber pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) is_package = self._is_package(article) news_message = etree.Element('newsMessage', attrib=self._debug_message_extra, nsmap=self._message_nsmap) self._format_header(article, news_message, pub_seq_num) item_set = self._format_item(news_message) if is_package: item = self._format_item_set(article, item_set, 'packageItem') self._format_groupset(article, item) elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}: item = self._format_item_set(article, item_set, 'newsItem') self._format_contentset(article, item) else: nitfFormater = NITFFormatter() nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num) newsItem = self._format_item_set(article, item_set, 'newsItem') self._format_content(article, newsItem, nitf) sd_etree.fix_html_void_elements(news_message) return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message, pretty_print=True).decode('utf-8'))] except Exception as ex: raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Create article in NewsML G2 format :param dict article: :param dict subscriber: :param list codes: selector codes :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) is_package = self._is_package(article) self._message_attrib.update(self._debug_message_extra) news_message = etree.Element('newsMessage', attrib=self._message_attrib) self._format_header(article, news_message, pub_seq_num) item_set = self._format_item(news_message) if is_package: item = self._format_item_set(article, item_set, 'packageItem') self._format_groupset(article, item) elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}: item = self._format_item_set(article, item_set, 'newsItem') self._format_contentset(article, item) else: nitfFormater = NITFFormatter() nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num) newsItem = self._format_item_set(article, item_set, 'newsItem') self._format_content(article, newsItem, nitf) return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message).decode('utf-8'))] except Exception as ex: raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
class NewsMLG2Formatter(BaseFormatter): MIMETYPE = 'application/vnd.iptc.g2.newsitem+xml' FILE_EXTENSION = 'xml' encoding = 'utf-8' formatter = NewsroomFormatter() nitf_formatter = NITFFormatter() def format_item(self, item, item_type='items'): item = item.copy() item.setdefault('guid', item['_id']) item.setdefault('_current_version', item['version']) item.setdefault('state', '') news_message = etree.Element( 'newsMessage', attrib=self.formatter._debug_message_extra, nsmap=self.formatter._message_nsmap) self.formatter._format_header(item, news_message, '') item_set = self.formatter._format_item(news_message) news_item = self.formatter._format_item_set(item, item_set, 'newsItem') nitf = self.nitf_formatter.get_nitf(item, {}, '') self.formatter._format_content(item, news_item, nitf) return etree.tostring(news_message, xml_declaration=True, pretty_print=True, encoding=self.encoding)
class NewsMLG2Formatter(): encoding = 'utf-8' formatter = NewsroomFormatter() nitf_formatter = NITFFormatter() def format_filename(self, item): return '{}.xml'.format(item['_id']) def format_item(self, item): item = item.copy() item.setdefault('guid', item['_id']) item.setdefault('_current_version', item['version']) item.setdefault('state', '') news_message = etree.Element( 'newsMessage', attrib=self.formatter._debug_message_extra, nsmap=self.formatter._message_nsmap) self.formatter._format_header(item, news_message, '') item_set = self.formatter._format_item(news_message) news_item = self.formatter._format_item_set(item, item_set, 'newsItem') nitf = self.nitf_formatter.get_nitf(item, {}, '') self.formatter._format_content(item, news_item, nitf) return etree.tostring(news_message, xml_declaration=True, pretty_print=True, encoding=self.encoding)
class NITFFormatter(): encoding = 'utf-8' formatter = NITFFormatter() def format_filename(self, item): return '{}.xml'.format(item['_id']) def format_item(self, item): dest = {} nitf = self.formatter.get_nitf(item, dest, '') return etree.tostring(nitf, xml_declaration=True, pretty_print=True, encoding=self.encoding)
def __init__(self): NITFFormatter.__init__(self) self.HTML2NITF['p']['filter'] = self.p_filter
def setUp(self): super().setUp() self.formatter = NITFFormatter() self.base_formatter = Formatter() init_app(self.app)
class NitfFormatterTest(TestCase): def setUp(self): super().setUp() self.formatter = NITFFormatter() self.base_formatter = Formatter() init_app(self.app) def test_append_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': { 'marked_for_legal': True } } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Legal: Obama Republican Healthc') slugline = self.base_formatter.append_legal(article, truncate=True) self.assertEqual(slugline, 'Legal: Obama Republican ') def test_append_legal_when_not_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': { 'marked_for_legal': False } } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Obama Republican Healthc') def test_formatter(self): article = { 'headline': 'test headline', 'body_html': '<p>test body</p>', 'type': 'text', 'priority': '1', '_id': 'urn:localhost.abc', 'urgency': 2 } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual(nitf_xml.find('head/title').text, article['headline']) self.assertEqual( nitf_xml.find('body/body.content/p').text, 'test body') self.assertEqual( nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2') def test_company_codes(self): article = { 'guid': 'tag:aap.com.au:20150613:12345', '_current_version': 1, 'anpa_category': [{ 'qcode': 'f', 'name': 'Finance' }], 'source': 'AAP', 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{ 'qcode': '02011001', 'name': 'international court or tribunal' }, { 'qcode': '02011002', 'name': 'extradition' }], 'anpa_take_key': 'take_key', 'unique_id': '1', 'body_html': 'The story body', 'type': 'text', 'word_count': '1', 'priority': '1', '_id': 'urn:localhost.abc', 'state': 'published', 'urgency': 2, 'pubstatus': 'usable', 'dateline': { 'source': 'AAP', 'text': 'Los Angeles, Aug 11 AAP -', 'located': { 'alt_name': '', 'state': 'California', 'city_code': 'Los Angeles', 'city': 'Los Angeles', 'dateline': 'city', 'country_code': 'US', 'country': 'USA', 'tz': 'America/Los_Angeles', 'state_code': 'CA' } }, 'creditline': 'sample creditline', 'keywords': ['traffic'], 'abstract': 'sample abstract', 'place': [{ 'qcode': 'Australia', 'name': 'Australia', 'state': '', 'country': 'Australia', 'world_region': 'Oceania' }], 'company_codes': [{ 'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX' }] } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) company = nitf_xml.find('body/body.head/org') self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED') self.assertEqual(company.attrib.get('idsrc', ''), 'ASX') self.assertEqual(company.attrib.get('value', ''), 'YAL') def testNoneAsciNamesContent(self): article = { '_id': '3', 'source': 'AAP', 'anpa_category': [{ 'qcode': 'a' }], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{ 'qcode': '02011001' }], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>', 'word_count': '1', 'priority': 1, "linked_in_packages": [{ "package": "package", "package_type": "takes" }], } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual( nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')
class NitfFormatterTest(TestCase): def setUp(self): self.formatter = NITFFormatter() self.base_formatter = Formatter() init_app(self.app) def test_append_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': { 'marked_for_legal': True } } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Legal: Obama Republican Healthc') slugline = self.base_formatter.append_legal(article, truncate=True) self.assertEqual(slugline, 'Legal: Obama Republican ') def test_append_legal_when_not_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': { 'marked_for_legal': False } } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Obama Republican Healthc') def test_formatter(self): article = { 'headline': 'test headline', 'body_html': '<p>test body</p>', 'type': 'text', 'priority': '1', '_id': 'urn:localhost.abc', 'urgency': 2 } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual(nitf_xml.find('head/title').text, article['headline']) self.assertEqual( nitf_xml.find('body/body.content/p').text, 'test body') self.assertEqual( nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2') def test_html2nitf(self): html = etree.fromstring( dedent("""\ <div> <unknown> <p> this should be still here </p> </unknown> <p style="this='is';some='style'"> <strong>this text should be <i>modified</i> </strong> so <span>[this should not be removed]</span> unkown <em unknown_attribute="toto">elements</em> and <a bad_attribute="to_remove">attributes</a> are <h6>removed</h6> </p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> <p> this should be still here </p> <p> <em class="bold">this text should be <em class="italic">modified</em> </em> so [this should not be removed] unkown <em class="italic">elements</em> and <a>attributes</a> are <hl2>removed</hl2> </p> </div>""").replace('\n', '').replace(' ', '') self.assertEqual( etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected) def test_html2nitf_br(self): """Check that <br/> is kept if it is a child of and enrichedText parent element""" html = etree.fromstring( dedent("""\ <div> <br/>the previous tag should be removed (but not the text) <p> the following tag <br/> should still be here and the next one <br/> too </p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> the previous tag should be removed (but not the text) <p> the following tag <br/> should still be here and the next one <br/> too </p> </div>""") self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected) def test_body_content_br(self): article = { "_id": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0", "guid": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0", "state": "published", "body_html": "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it " "may have been a 10-minute delay by police that cost two lives.</p><p>Shortly after 2am " "on December 16, 2014, gunman Man Haron Monis fired at escaping hostages.</p><p>That first " "shot by Monis should have satisfied a so-called \"secondary trigger\" prompting " "police to storm the Martin Place stronghold with some element of surprise, NSW Coroner " "Michael Barnes found on Wednesday.</p><p>\"(But) the 10 minutes that lapsed without " "decisive action by police was too long,\" Mr Barnes said in Sydney.</p><p>By the " "time police smashed their way into the cafe at 2.14am in a flurry of stun-grenade " "explosions, manager Tori Johnson had been forced to his knees and shot in the head." "</p><p>Barrister Katrina Dawson was killed by police bullet fragments.</p><p>New police " "commissioner Mick Fuller - who was one of the first commanders at the scene - admitted on " "Wednesday tactical officers should have gone in earlier.</p><p>Mr Fuller went further than " "the coroner when he told the Ten Network: \"We probably should have gone in before the " "first shot.\"</p><p>\"Early intervention for terrorist incidents is the way forward, " "knowing what we know now,\" he said.</p><p>\"But nevertheless it's still a very " "dangerous tactic and people will potentially still lose their lives.\"</p><p>Mr Barnes " "has made 45 findings on everything from police negotiation strategies to greater legal " "protections for frontline officers in terrorist situations.<br></p><p>He lay the blame " "for the loss of life squarely at the feet of Monis, but other parties, including prosecution " "solicitors and a consulting psychiatrist, copped some criticism.</p><p>The cafe " "was \"transformed into a prison run by a vicious maniac\" that day, Mr Barnes " "said.<br></p><p class=\"\">The traditional \"contain and negotiate\" strategy was " "appropriate early on but police failed to adequately reassess taking into account the " "headway they were making with Monis.<br></p><p class=\"\">\"Sadly, it failed,\" Mr Barnes " "said on Wednesday.<br></p><p class=\"\">\"The 'contain and negotiate' approach may not be " "the best ongoing response to a terrorist incident if the offenders believe, whether or not " "they survive, their cause will benefit from the publicity generated by a protracted " "siege.\"<br></p><p class=\"\">Mr Fuller said the containment strategy had saved countless " "lives over the years and wouldn't be abandoned for non-terrorist incidents.</p>" "<p class=\"\">Police mistakenly thought Monis' backpack could house explosives, " "but Mr Barnes noted senior officers were \"unduly reluctant\" to initiate direct action " "plans during siege situations.</p><p class=\"\">The \"primary trigger\" for such an " "assault was identified as the death or serious injury of a hostage - evidence which " "disgusted the families of victims and survivors.</p><p class=\"\">The police response was " "at times hampered by mishaps.</p><p>Eight calls to a phone number hostages expected would " "connect them with negotiators were missed, which was a \"significant failure\", Mr Barnes " "said.</p><p>Commanders and tactical officers received specialist terrorist training but " "negotiators received \"little, if any\", the coroner added.</p><p>The stand-off could not " "have eventuated in the first place if Monis had not been granted bail on accessory to murder" " and dozens of sexual assault offences.<br></p><p>Mr Barnes found the work of an Office of " "the Director of Public Prosecutions solicitor in December 2013 was inadequate, " "\"erroneously\" advising a court Monis didn't have to show exceptional circumstances " "in arguing for bail.</p><p>Police also made a mistake by issuing Monis with a court " "attendance notice for the sexual offences in October 2014 rather than arresting him" ".</p><p>Monis was already on bail at the time for a commonwealth offence after he'd " "written offensive letters to the families of Australian soldiers killed in the Middle" " East.</p><p>States can find it difficult to access commonwealth records, Mr Barnes said," " and he called for that to be remedied.</p><p>Some of the coroner's harshest individual " "criticism was reserved for the consultant psychiatrist who advised police.</p>" "<p>His \"sub-optimal\" performance included a belief that Monis was merely grandstanding," " Mr Barnes found.</p><p>The doctor should not have been permitted to advise on negotiation " "strategy and he made \"erroneous and unrealistic assessments\" of what was happening inside " "the cafe.</p><p>The psychiatrist's advice was ambiguous and Islamic terrorism was beyond his" " expertise.</p><p>\"The police commanders underestimated the threat Monis posed,\" Mr Barnes" " said, in part blaming their reliance upon the psychiatrist's opinion.</p><p>He recommended " "a more diverse panel of experts be used in the future.</p><p>The coroner stated police " "snipers couldn't have ended the siege despite a 10-minute window where they had clear sight " "of a head that could have been Monis.</p><p>Those gunmen were never confident in their legal " "justification for a \"kill shot\" and Mr Barnes suggested their power to use force " "should be more clearly defined.</p><p>The coroner did acknowledge that sending tactical " "officers into the cafe after their hand was forced was a decision no commander would " "ever want to face.</p><p>\"The bravery of these officers inspires awe,\" he said.<br></p>", "pubstatus": "usable", "type": "text", "abstract": "<p>The NSW coroner believes a 10-minute period of inaction by police before the bloody end " "of the 2014 Lindt Cafe siege was \"too long\".</p>", "priority": 6, "unique_id": 12055427, "format": "HTML", "genre": [{ "qcode": "Wrap", "name": "Wrap" }], "word_count": 843, "source": "AAP", "urgency": 1, "subject": [{ "qcode": "16001000", "parent": "16000000", "name": "act of terror" }, { "qcode": "02001010", "parent": "02001000", "name": "terrorism" }], "flags": { "marked_archived_only": False, "marked_for_legal": False, "marked_for_not_publication": False, "marked_for_sms": False }, "headline": "'Ten minutes was too long': Lindt siege", "dateline": { "source": "AAP", "text": "SYDNEY, May 24 AAP -", "located": { "city": "Sydney", "country_code": "AU", "country": "Australia", "dateline": "city", "state_code": "NSW", "state": "New South Wales", "alt_name": "", "tz": "Australia/Sydney", "city_code": "Sydney" } }, "anpa_category": [{ "qcode": "a", "name": "Australian General News" }], "unique_name": "#12055427", "place": [{ "name": "NSW", "qcode": "NSW", "country": "Australia", "world_region": "Oceania", "group": "Australia", "state": "New South Wales" }], "sign_off": "SN/jmk/jcd/pmu", "anpa_take_key": "2nd Wrap (pix/video available)", "language": "en", "slugline": "Cafe", "byline": "Jamie McKinnell", "version": 2, } response = self.formatter.format(article, {}) nitf_xml = etree.fromstring(response[0][1]) self.assertEqual( etree.tostring(nitf_xml.find('body/body.content/p'), encoding="unicode"), "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it may " "have been a 10-minute delay by police that cost two lives.</p>\n " ) self.assertTrue( nitf_xml.findall('body/body.content/p')[1].text.startswith( 'Shortly after 2am')) def test_html2nitf_br_last(self): """Check that last <br/> in a <p> element is removed""" html = etree.fromstring( dedent("""\ <div> <p> the following tag <br/> should still be here </p> <p> and the next one <br/> too </p> <p> but not the last one:<br/> </p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> <p> the following tag <br/> should still be here </p> <p> and the next one <br/> too </p> <p> but not the last one: </p> </div>""") self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected) def test_html2nitf_style_cleaning(self): """Check that <style> element and style attribute are removed from HTML""" html = etree.fromstring( dedent("""\ <div> <style type="text/css"> p { margin-bottom: 0.25cm; line-height: 120%; }a:link { } </style> <p style="margin-bottom: 0cm; line-height: 100%">Test bla bla bla</p> <p style="margin-bottom: 0cm; line-height: 100%"> <br/> </p> <p style="margin-bottom: 0cm; line-height: 100%"> <font face="DejaVu Sans, sans-serif"> <font style="font-size: 14pt" size="4"> <i> <u> <b>test</b> </u> </i> </font> </font> </p> <p style="margin-bottom: 0cm; line-height: 100%">toto</p> <p style="margin-bottom: 0cm; line-height: 100%">titi</p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> <p>Test bla bla bla</p> <p> </p> <p> <em class="italic"> <em class="underscore"> <em class="bold">test</em> </em> </em> </p> <p>toto</p> <p>titi</p> </div>""").replace('\n', '').replace(' ', '') self.assertEqual( etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected) def test_table(self): html_raw = """ <div> <table> <tbody> <tr> <td>Table cell 1</td> <td>Table cell 2</td> <td>Table cell 3</td> </tr> <tr> <td>Table cell 2.1</td> <td>Table cell 2.2</td> <td>Table cell 2.3</td> </tr> <tr> <td>Table cell 3.1</td> <td>Table cell 3.2</td> <td>Table cell 3.3</td> </tr> </tbody> </table> </div> """.replace('\n', '').replace(' ', '') html = etree.fromstring(html_raw) nitf = self.formatter.html2nitf(html) self.assertEqual(etree.tostring(nitf, encoding='unicode'), html_raw) def test_company_codes(self): article = { 'guid': 'tag:aap.com.au:20150613:12345', '_current_version': 1, 'anpa_category': [{ 'qcode': 'f', 'name': 'Finance' }], 'source': 'AAP', 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{ 'qcode': '02011001', 'name': 'international court or tribunal' }, { 'qcode': '02011002', 'name': 'extradition' }], 'anpa_take_key': 'take_key', 'unique_id': '1', 'body_html': 'The story body', 'type': 'text', 'word_count': '1', 'priority': '1', '_id': 'urn:localhost.abc', 'state': 'published', 'urgency': 2, 'pubstatus': 'usable', 'dateline': { 'source': 'AAP', 'text': 'Los Angeles, Aug 11 AAP -', 'located': { 'alt_name': '', 'state': 'California', 'city_code': 'Los Angeles', 'city': 'Los Angeles', 'dateline': 'city', 'country_code': 'US', 'country': 'USA', 'tz': 'America/Los_Angeles', 'state_code': 'CA' } }, 'creditline': 'sample creditline', 'keywords': ['traffic'], 'abstract': 'sample abstract', 'place': [{ 'qcode': 'Australia', 'name': 'Australia', 'state': '', 'country': 'Australia', 'world_region': 'Oceania' }], 'company_codes': [{ 'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX' }] } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) company = nitf_xml.find('body/body.head/org') self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED') self.assertEqual(company.attrib.get('idsrc', ''), 'ASX') self.assertEqual(company.attrib.get('value', ''), 'YAL') def testNoneAsciNamesContent(self): article = { '_id': '3', 'source': 'AAP', 'anpa_category': [{ 'qcode': 'a' }], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{ 'qcode': '02011001' }], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>', 'word_count': '1', 'priority': 1, "linked_in_packages": [{ "package": "package", "package_type": "takes" }], } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual( nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')
class NitfFormatterTest(TestCase): def setUp(self): super().setUp() self.formatter = NITFFormatter() self.base_formatter = Formatter() init_app(self.app) def test_append_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': {'marked_for_legal': True} } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Legal: Obama Republican Healthc') slugline = self.base_formatter.append_legal(article, truncate=True) self.assertEqual(slugline, 'Legal: Obama Republican ') def test_append_legal_when_not_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': {'marked_for_legal': False} } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Obama Republican Healthc') def test_formatter(self): article = { 'headline': 'test headline', 'body_html': '<p>test body</p>', 'type': 'text', 'priority': '1', '_id': 'urn:localhost.abc', 'urgency': 2 } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual(nitf_xml.find('head/title').text, article['headline']) self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body') self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2') def test_company_codes(self): article = { 'guid': 'tag:aap.com.au:20150613:12345', '_current_version': 1, 'anpa_category': [{'qcode': 'f', 'name': 'Finance'}], 'source': 'AAP', 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'}, {'qcode': '02011002', 'name': 'extradition'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'body_html': 'The story body', 'type': 'text', 'word_count': '1', 'priority': '1', '_id': 'urn:localhost.abc', 'state': 'published', 'urgency': 2, 'pubstatus': 'usable', 'dateline': { 'source': 'AAP', 'text': 'Los Angeles, Aug 11 AAP -', 'located': { 'alt_name': '', 'state': 'California', 'city_code': 'Los Angeles', 'city': 'Los Angeles', 'dateline': 'city', 'country_code': 'US', 'country': 'USA', 'tz': 'America/Los_Angeles', 'state_code': 'CA' } }, 'creditline': 'sample creditline', 'keywords': ['traffic'], 'abstract': 'sample abstract', 'place': [{'qcode': 'Australia', 'name': 'Australia', 'state': '', 'country': 'Australia', 'world_region': 'Oceania'}], 'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}] } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) company = nitf_xml.find('body/body.head/org') self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED') self.assertEqual(company.attrib.get('idsrc', ''), 'ASX') self.assertEqual(company.attrib.get('value', ''), 'YAL')
class NitfFormatterTest(TestCase): def setUp(self): self.formatter = NITFFormatter() self.base_formatter = Formatter() init_app(self.app) def test_append_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': {'marked_for_legal': True} } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Legal: Obama Republican Healthc') slugline = self.base_formatter.append_legal(article, truncate=True) self.assertEqual(slugline, 'Legal: Obama Republican ') def test_append_legal_when_not_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': {'marked_for_legal': False} } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Obama Republican Healthc') def test_formatter(self): article = { 'headline': 'test headline', 'body_html': '<p>test body</p>', 'type': 'text', 'priority': '1', '_id': 'urn:localhost.abc', 'urgency': 2 } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual(nitf_xml.find('head/title').text, article['headline']) self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body') self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2') def test_html2nitf(self): html = etree.fromstring(dedent("""\ <div> <unknown> <p> this should be still here </p> </unknown> <p style="this='is';some='style'"> <strong>this text should be <i>modified</i> </strong> so <span>[this should not be removed]</span> unkown <em unknown_attribute="toto">elements</em> and <a bad_attribute="to_remove">attributes</a> are <h6>removed</h6> </p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> <p> this should be still here </p> <p> <em class="bold">this text should be <em class="italic">modified</em> </em> so [this should not be removed] unkown <em class="italic">elements</em> and <a>attributes</a> are <hl2>removed</hl2> </p> </div>""").replace('\n', '').replace(' ', '') self.assertEqual(etree.tostring(nitf, 'unicode').replace('\n', '').replace(' ', ''), expected) def test_company_codes(self): article = { 'guid': 'tag:aap.com.au:20150613:12345', '_current_version': 1, 'anpa_category': [{'qcode': 'f', 'name': 'Finance'}], 'source': 'AAP', 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'}, {'qcode': '02011002', 'name': 'extradition'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'body_html': 'The story body', 'type': 'text', 'word_count': '1', 'priority': '1', '_id': 'urn:localhost.abc', 'state': 'published', 'urgency': 2, 'pubstatus': 'usable', 'dateline': { 'source': 'AAP', 'text': 'Los Angeles, Aug 11 AAP -', 'located': { 'alt_name': '', 'state': 'California', 'city_code': 'Los Angeles', 'city': 'Los Angeles', 'dateline': 'city', 'country_code': 'US', 'country': 'USA', 'tz': 'America/Los_Angeles', 'state_code': 'CA' } }, 'creditline': 'sample creditline', 'keywords': ['traffic'], 'abstract': 'sample abstract', 'place': [{'qcode': 'Australia', 'name': 'Australia', 'state': '', 'country': 'Australia', 'world_region': 'Oceania'}], 'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}] } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) company = nitf_xml.find('body/body.head/org') self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED') self.assertEqual(company.attrib.get('idsrc', ''), 'ASX') self.assertEqual(company.attrib.get('value', ''), 'YAL') def testNoneAsciNamesContent(self): article = { '_id': '3', 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>', 'word_count': '1', 'priority': 1, "linked_in_packages": [ { "package": "package", "package_type": "takes" } ], } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')
class NitfFormatterTest(TestCase): def setUp(self): self.formatter = NITFFormatter() self.base_formatter = Formatter() init_app(self.app) def test_append_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': {'marked_for_legal': True} } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Legal: Obama Republican Healthc') slugline = self.base_formatter.append_legal(article, truncate=True) self.assertEqual(slugline, 'Legal: Obama Republican ') def test_append_legal_when_not_legal(self): article = { 'slugline': 'Obama Republican Healthc', 'flags': {'marked_for_legal': False} } slugline = self.base_formatter.append_legal(article) self.assertEqual(slugline, 'Obama Republican Healthc') def test_formatter(self): article = { 'headline': 'test headline', 'body_html': '<p>test body</p>', 'type': 'text', 'priority': '1', '_id': 'urn:localhost.abc', 'urgency': 2 } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual(nitf_xml.find('head/title').text, article['headline']) self.assertEqual(nitf_xml.find('body/body.content/p').text, 'test body') self.assertEqual(nitf_xml.find('head/docdata/urgency').get('ed-urg'), '2') def test_html2nitf(self): html = etree.fromstring(dedent("""\ <div> <unknown> <p> this should be still here </p> </unknown> <p style="this='is';some='style'"> <strong>this text should be <i>modified</i> </strong> so <span>[this should not be removed]</span> unkown <em unknown_attribute="toto">elements</em> and <a bad_attribute="to_remove">attributes</a> are <h6>removed</h6> </p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> <p> this should be still here </p> <p> <em class="bold">this text should be <em class="italic">modified</em> </em> so [this should not be removed] unkown <em class="italic">elements</em> and <a>attributes</a> are <hl2>removed</hl2> </p> </div>""").replace('\n', '').replace(' ', '') self.assertEqual(etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected) def test_html2nitf_br(self): """Check that <br/> is kept if it is a child of and enrichedText parent element""" html = etree.fromstring(dedent("""\ <div> <br/>the previous tag should be removed (but not the text) <p> the following tag <br/> should still be here and the next one <br/> too </p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> the previous tag should be removed (but not the text) <p> the following tag <br/> should still be here and the next one <br/> too </p> </div>""") self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected) def test_body_content_br(self): article = { "_id": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0", "guid": "urn:newsml:localhost:2017-05-24T16:56:29.742769:3d1faf62-6f70-4b28-9222-93ec603b7af0", "state": "published", "body_html": "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it " "may have been a 10-minute delay by police that cost two lives.</p><p>Shortly after 2am " "on December 16, 2014, gunman Man Haron Monis fired at escaping hostages.</p><p>That first " "shot by Monis should have satisfied a so-called \"secondary trigger\" prompting " "police to storm the Martin Place stronghold with some element of surprise, NSW Coroner " "Michael Barnes found on Wednesday.</p><p>\"(But) the 10 minutes that lapsed without " "decisive action by police was too long,\" Mr Barnes said in Sydney.</p><p>By the " "time police smashed their way into the cafe at 2.14am in a flurry of stun-grenade " "explosions, manager Tori Johnson had been forced to his knees and shot in the head." "</p><p>Barrister Katrina Dawson was killed by police bullet fragments.</p><p>New police " "commissioner Mick Fuller - who was one of the first commanders at the scene - admitted on " "Wednesday tactical officers should have gone in earlier.</p><p>Mr Fuller went further than " "the coroner when he told the Ten Network: \"We probably should have gone in before the " "first shot.\"</p><p>\"Early intervention for terrorist incidents is the way forward, " "knowing what we know now,\" he said.</p><p>\"But nevertheless it's still a very " "dangerous tactic and people will potentially still lose their lives.\"</p><p>Mr Barnes " "has made 45 findings on everything from police negotiation strategies to greater legal " "protections for frontline officers in terrorist situations.<br></p><p>He lay the blame " "for the loss of life squarely at the feet of Monis, but other parties, including prosecution " "solicitors and a consulting psychiatrist, copped some criticism.</p><p>The cafe " "was \"transformed into a prison run by a vicious maniac\" that day, Mr Barnes " "said.<br></p><p class=\"\">The traditional \"contain and negotiate\" strategy was " "appropriate early on but police failed to adequately reassess taking into account the " "headway they were making with Monis.<br></p><p class=\"\">\"Sadly, it failed,\" Mr Barnes " "said on Wednesday.<br></p><p class=\"\">\"The 'contain and negotiate' approach may not be " "the best ongoing response to a terrorist incident if the offenders believe, whether or not " "they survive, their cause will benefit from the publicity generated by a protracted " "siege.\"<br></p><p class=\"\">Mr Fuller said the containment strategy had saved countless " "lives over the years and wouldn't be abandoned for non-terrorist incidents.</p>" "<p class=\"\">Police mistakenly thought Monis' backpack could house explosives, " "but Mr Barnes noted senior officers were \"unduly reluctant\" to initiate direct action " "plans during siege situations.</p><p class=\"\">The \"primary trigger\" for such an " "assault was identified as the death or serious injury of a hostage - evidence which " "disgusted the families of victims and survivors.</p><p class=\"\">The police response was " "at times hampered by mishaps.</p><p>Eight calls to a phone number hostages expected would " "connect them with negotiators were missed, which was a \"significant failure\", Mr Barnes " "said.</p><p>Commanders and tactical officers received specialist terrorist training but " "negotiators received \"little, if any\", the coroner added.</p><p>The stand-off could not " "have eventuated in the first place if Monis had not been granted bail on accessory to murder" " and dozens of sexual assault offences.<br></p><p>Mr Barnes found the work of an Office of " "the Director of Public Prosecutions solicitor in December 2013 was inadequate, " "\"erroneously\" advising a court Monis didn't have to show exceptional circumstances " "in arguing for bail.</p><p>Police also made a mistake by issuing Monis with a court " "attendance notice for the sexual offences in October 2014 rather than arresting him" ".</p><p>Monis was already on bail at the time for a commonwealth offence after he'd " "written offensive letters to the families of Australian soldiers killed in the Middle" " East.</p><p>States can find it difficult to access commonwealth records, Mr Barnes said," " and he called for that to be remedied.</p><p>Some of the coroner's harshest individual " "criticism was reserved for the consultant psychiatrist who advised police.</p>" "<p>His \"sub-optimal\" performance included a belief that Monis was merely grandstanding," " Mr Barnes found.</p><p>The doctor should not have been permitted to advise on negotiation " "strategy and he made \"erroneous and unrealistic assessments\" of what was happening inside " "the cafe.</p><p>The psychiatrist's advice was ambiguous and Islamic terrorism was beyond his" " expertise.</p><p>\"The police commanders underestimated the threat Monis posed,\" Mr Barnes" " said, in part blaming their reliance upon the psychiatrist's opinion.</p><p>He recommended " "a more diverse panel of experts be used in the future.</p><p>The coroner stated police " "snipers couldn't have ended the siege despite a 10-minute window where they had clear sight " "of a head that could have been Monis.</p><p>Those gunmen were never confident in their legal " "justification for a \"kill shot\" and Mr Barnes suggested their power to use force " "should be more clearly defined.</p><p>The coroner did acknowledge that sending tactical " "officers into the cafe after their hand was forced was a decision no commander would " "ever want to face.</p><p>\"The bravery of these officers inspires awe,\" he said.<br></p>", "pubstatus": "usable", "type": "text", "abstract": "<p>The NSW coroner believes a 10-minute period of inaction by police before the bloody end " "of the 2014 Lindt Cafe siege was \"too long\".</p>", "priority": 6, "unique_id": 12055427, "format": "HTML", "genre": [ { "qcode": "Wrap", "name": "Wrap" } ], "word_count": 843, "source": "AAP", "urgency": 1, "subject": [ { "qcode": "16001000", "parent": "16000000", "name": "act of terror" }, { "qcode": "02001010", "parent": "02001000", "name": "terrorism" } ], "flags": { "marked_archived_only": False, "marked_for_legal": False, "marked_for_not_publication": False, "marked_for_sms": False }, "headline": "'Ten minutes was too long': Lindt siege", "dateline": { "source": "AAP", "text": "SYDNEY, May 24 AAP -", "located": { "city": "Sydney", "country_code": "AU", "country": "Australia", "dateline": "city", "state_code": "NSW", "state": "New South Wales", "alt_name": "", "tz": "Australia/Sydney", "city_code": "Sydney" } }, "anpa_category": [ { "qcode": "a", "name": "Australian General News" } ], "unique_name": "#12055427", "place": [ { "name": "NSW", "qcode": "NSW", "country": "Australia", "world_region": "Oceania", "group": "Australia", "state": "New South Wales" } ], "sign_off": "SN/jmk/jcd/pmu", "anpa_take_key": "2nd Wrap (pix/video available)", "language": "en", "slugline": "Cafe", "byline": "Jamie McKinnell", "version": 2, } response = self.formatter.format(article, {}) nitf_xml = etree.fromstring(response[0][1]) self.assertEqual(etree.tostring(nitf_xml.find('body/body.content/p'), encoding="unicode"), "<p>Sydney's Lindt Cafe siege hostages endured 17-hours of torture but, in the end, it may " "have been a 10-minute delay by police that cost two lives.</p>\n ") self.assertTrue(nitf_xml.findall('body/body.content/p')[1].text.startswith('Shortly after 2am')) def test_html2nitf_br_last(self): """Check that last <br/> in a <p> element is removed""" html = etree.fromstring(dedent("""\ <div> <p> the following tag <br/> should still be here </p> <p> and the next one <br/> too </p> <p> but not the last one:<br/> </p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> <p> the following tag <br/> should still be here </p> <p> and the next one <br/> too </p> <p> but not the last one: </p> </div>""") self.assertEqual(etree.tostring(nitf, encoding='unicode'), expected) def test_html2nitf_style_cleaning(self): """Check that <style> element and style attribute are removed from HTML""" html = etree.fromstring(dedent("""\ <div> <style type="text/css"> p { margin-bottom: 0.25cm; line-height: 120%; }a:link { } </style> <p style="margin-bottom: 0cm; line-height: 100%">Test bla bla bla</p> <p style="margin-bottom: 0cm; line-height: 100%"> <br/> </p> <p style="margin-bottom: 0cm; line-height: 100%"> <font face="DejaVu Sans, sans-serif"> <font style="font-size: 14pt" size="4"> <i> <u> <b>test</b> </u> </i> </font> </font> </p> <p style="margin-bottom: 0cm; line-height: 100%">toto</p> <p style="margin-bottom: 0cm; line-height: 100%">titi</p> </div> """)) nitf = self.formatter.html2nitf(html, attr_remove=['style']) expected = dedent("""\ <div> <p>Test bla bla bla</p> <p> </p> <p> <em class="italic"> <em class="underscore"> <em class="bold">test</em> </em> </em> </p> <p>toto</p> <p>titi</p> </div>""").replace('\n', '').replace(' ', '') self.assertEqual(etree.tostring(nitf, encoding='unicode').replace('\n', '').replace(' ', ''), expected) def test_table(self): html_raw = """ <div> <table> <tbody> <tr> <td>Table cell 1</td> <td>Table cell 2</td> <td>Table cell 3</td> </tr> <tr> <td>Table cell 2.1</td> <td>Table cell 2.2</td> <td>Table cell 2.3</td> </tr> <tr> <td>Table cell 3.1</td> <td>Table cell 3.2</td> <td>Table cell 3.3</td> </tr> </tbody> </table> </div> """.replace('\n', '').replace(' ', '') html = etree.fromstring(html_raw) nitf = self.formatter.html2nitf(html) self.assertEqual(etree.tostring(nitf, encoding='unicode'), html_raw) def test_company_codes(self): article = { 'guid': 'tag:aap.com.au:20150613:12345', '_current_version': 1, 'anpa_category': [{'qcode': 'f', 'name': 'Finance'}], 'source': 'AAP', 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001', 'name': 'international court or tribunal'}, {'qcode': '02011002', 'name': 'extradition'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'body_html': 'The story body', 'type': 'text', 'word_count': '1', 'priority': '1', '_id': 'urn:localhost.abc', 'state': 'published', 'urgency': 2, 'pubstatus': 'usable', 'dateline': { 'source': 'AAP', 'text': 'Los Angeles, Aug 11 AAP -', 'located': { 'alt_name': '', 'state': 'California', 'city_code': 'Los Angeles', 'city': 'Los Angeles', 'dateline': 'city', 'country_code': 'US', 'country': 'USA', 'tz': 'America/Los_Angeles', 'state_code': 'CA' } }, 'creditline': 'sample creditline', 'keywords': ['traffic'], 'abstract': 'sample abstract', 'place': [{'qcode': 'Australia', 'name': 'Australia', 'state': '', 'country': 'Australia', 'world_region': 'Oceania'}], 'company_codes': [{'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX'}] } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) company = nitf_xml.find('body/body.head/org') self.assertEqual(company.text, 'YANCOAL AUSTRALIA LIMITED') self.assertEqual(company.attrib.get('idsrc', ''), 'ASX') self.assertEqual(company.attrib.get('value', ''), 'YAL') def testNoneAsciNamesContent(self): article = { '_id': '3', 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'body_html': '<p>Tommi Mäkinen crashes a Škoda in Äppelbo</p>', 'word_count': '1', 'priority': 1, "linked_in_packages": [ { "package": "package", "package_type": "takes" } ], } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] nitf_xml = etree.fromstring(doc) self.assertEqual(nitf_xml.find('body/body.content/p').text, 'Tommi Mäkinen crashes a Škoda in Äppelbo')