예제 #1
0
    def test_fetch_to_jimi(self, update_renditions_mock):
        service = OrangelogicSearchProvider(self.provider)

        update_renditions_mock.side_effects = set_rendition

        self.app.media.get.return_value = io.BytesIO(
            read_fixture('9e627f74b97841b3b8562b6547ada9c7-d1538139479c43e88021152.jpg', 'rb')
        )

        with HTTMock(auth_ok, fetch_ok):
            with patch.dict(superdesk.resources, resources):
                fetched = service.fetch({})
            update_renditions_mock.assert_called_once_with(
                fetched,
                'https://example.com/htm/GetDocumentAPI.aspx?F=TRX&DocID=2RLQZBCB4R4R4&token=token.foo',
                None,
            )

        self.assertEqual('picture', fetched['type'])
        self.assertIsInstance(fetched['firstcreated'], datetime)

        # populate ids
        fetched['family_id'] = fetched['guid']
        fetched['unique_id'] = 1

        with patch.dict(superdesk.resources, resources):
            formatter = JimiFormatter()
            xml = formatter.format(fetched, {})[0][1]

        root = etree.fromstring(xml.encode(formatter.ENCODING))

        self.assertEqual('Pictures', root.find('Services').text)

        item = root.find('ContentItem')

        self.assertEqual('Zhang Yuwei', item.find('Byline').text)
        self.assertEqual('I', item.find('Category').text)
        self.assertEqual('News - Optional', item.find('Ranking').text)
        self.assertEqual('5', item.find('RankingValue').text)
        self.assertEqual('THE ASSOCIATED PRESS', item.find('Credit').text)
        self.assertEqual('Virus Outbreak China Vaccine', item.find('SlugProper').text)
        self.assertEqual('Unknown AP', item.find('Source').text)
        self.assertEqual('Beijing', item.find('City').text)
        self.assertEqual('China', item.find('Country').text)
        self.assertEqual('Beijing;;China', item.find('Placeline').text)
        # self.assertEqual('XIN902', item.find('OrigTransRef').text)
        self.assertEqual('SUB', item.find('BylineTitle').text)
        self.assertEqual('NHG', item.find('CaptionWriter').text)
        self.assertEqual('Xinhua', item.find('Copyright').text)
        self.assertIn("In this April 10, 2020, photo released by Xinhua News Agency, a staff",
                      item.find('EnglishCaption').text)
        self.assertEqual('2020-04-12T00:09:37', item.find('DateTaken').text)
        self.assertEqual('NO SALES, PHOTO RELEASED BY XINHUA NEWS AGENCY APRIL 10, 2020 PHOTO',
                         item.find('SpecialInstructions').text)
        self.assertEqual('Unknown AP', item.find('ArchiveSources').text)
        self.assertEqual('9e627f74b97841b3b8562b6547ada9c7', item.find('CustomField1').text)
        self.assertEqual('Xinhua', item.find('CustomField6').text)
        self.assertEqual('9e627f74b97841b3b8562b6547ada9c7', item.find('SystemSlug').text)
예제 #2
0
    def test_fr(self):
        item = self.parse("fr.xml")
        self.assertIsNotNone(item)
        self.assertEqual("fr", item["language"])
        self.assertEqual("Communiqué", item["description_text"])

        item["unique_id"] = 1
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                _, output = JimiFormatter().format(item, {}, None)[0]
        self.assertIn("<Services>Écrit</Services>", output)
예제 #3
0
import superdesk
import requests_mock
import settings

from lxml import etree
from flask import json
from unittest.mock import MagicMock, patch

from tests.mock import SEQUENCE_NUMBER, resources
from tests.ingest.parser import get_fixture_path

from cp.ingest import CP_APMediaFeedParser
from cp.output.formatter.jimi import JimiFormatter

parser = CP_APMediaFeedParser()
formatter = JimiFormatter()


def fixture(filename):
    return os.path.join(
        os.path.dirname(__file__),
        "fixtures",
        filename,
    )


class AP2JimiTestCase(unittest.TestCase):

    app = flask.Flask(__name__)
    app.locators = MagicMock()
    app.config.update({"AP_TAGS_MAPPING": settings.AP_TAGS_MAPPING})
예제 #4
0
class JimiFormatterTestCase(BaseXmlFormatterTestCase):

    formatter = JimiFormatter()
    article = {
        "_id":
        "id",
        "guid":
        "id",
        "family_id":
        "famid",
        "type":
        "text",
        "headline":
        "Headline",
        "slugline":
        "slug",
        "creditline":
        "Credit",
        "source":
        "Source",
        "ednote":
        "Ednote",
        "word_count":
        123,
        "abstract":
        "<p>Abstract</p>",
        "body_html":
        "<p>Body HTML<br>test <b>bold</b> and <i>idiom</i></p>",
        "keywords": ["Foo bar", "baz"],
        "anpa_category": [{
            "name": "National",
            "qcode": "n"
        }],
        "subject": [
            {
                "name": "health",
                "qcode": "07000000",
                "scheme": "subject_custom"
            },
            {
                "name": "citizens",
                "qcode": "20000575",
                "scheme": "subject_custom"
            },
            {
                "name": "Foo",
                "qcode": "1231245",
                "scheme": "foo"
            },
            {
                "name": "Print",
                "qcode": "Print",
                "scheme": cp.DISTRIBUTION
            },
            {
                "name": "The Associated Press",
                "qcode": "ap---",
                "scheme": cp.DESTINATIONS,
            },
        ],
        "urgency":
        2,
        "language":
        "en-CA",
        "unique_id":
        123,
        "firstcreated":
        datetime(2020, 4, 1, 11, 13, 12, 25, tzinfo=UTC),
        "versioncreated":
        datetime(2020, 4, 1, 11, 23, 12, 25, tzinfo=UTC),
        "firstpublished":
        datetime(2020, 4, 1, 11, 33, 12, 25, tzinfo=UTC),
        "genre": [
            {
                "name": "NewsAlert",
                "qcode": "NewsAlert"
            },
        ],
        "extra": {
            cp.HEADLINE2: "headline2",
            cp.FILENAME: "filename",
        },
    }

    def format_item(self, updates=None, return_root=False):
        xml = self.format(updates)
        root = self.parse(xml)
        if return_root:
            return root
        return root.find("ContentItem")

    def test_can_format(self):
        self.assertTrue(self.formatter.can_format("jimi", {}))

    def test_format(self):
        xml = self.format()
        self.assertIn("<?xml version='1.0' encoding='utf-8'?>", xml)
        self.assertIn("<ContentText>&lt;p&gt;Body HTML&lt;br /&gt;test", xml)

        root = self.parse(xml)
        self.assertEqual("Publish", root.tag)
        self.assertEqual("false", root.find("Reschedule").text)
        self.assertEqual("false", root.find("IsRegional").text)
        self.assertEqual("true", root.find("CanAutoRoute").text)
        self.assertEqual(str(SEQUENCE_NUMBER), root.find("PublishID").text)
        self.assertEqual("Print", root.find("Services").text)
        self.assertEqual(None, root.find("Username").text)
        self.assertEqual("false", root.find("UseLocalsOut").text)
        self.assertEqual("ap---", root.find("PscCodes").text)
        self.assertEqual("2020-04-01T11:33:12",
                         root.find("PublishDateTime").text)

        item = root.find("ContentItem")
        self.assertEqual(None, item.find("Name").text)
        self.assertEqual("false", item.find("Cachable").text)

        # ids
        self.assertEqual("00000100", item.find("ContentItemID").text)
        self.assertEqual("00000123", item.find("NewsCompID").text)
        self.assertEqual(self.article["guid"], item.find("SystemSlug").text)
        self.assertEqual(self.article["guid"], item.find("FileName").text)
        self.assertEqual(self.article["extra"][cp.FILENAME],
                         item.find("OrigTransRef").text)

        # obvious
        self.assertEqual("Text", item.find("ContentType").text)

        # SDCP-309
        self.assertEqual(self.article["headline"], item.find("Headline2").text)
        self.assertEqual("headline2", item.find("Headline").text)

        self.assertEqual(self.article["creditline"], item.find("Credit").text)
        self.assertEqual(self.article["slugline"],
                         item.find("SlugProper").text)
        self.assertEqual(self.article["source"], item.find("Source").text)
        self.assertEqual(self.article["ednote"], item.find("EditorNote").text)
        self.assertEqual("6", item.find("WordCount").text)
        self.assertEqual("6", item.find("BreakWordCount").text)
        self.assertEqual("6", item.find("Length").text)
        self.assertEqual("Body HTMLtest bold and idiom",
                         item.find("DirectoryText").text)
        self.assertEqual(
            "<p>Body HTML<br />test <strong>bold</strong> and <em>idiom</em></p>",
            item.find("ContentText").text,
        )
        self.assertEqual(None, item.find("Placeline").text)
        self.assertEqual("0", item.find("WritethruValue").text)
        self.assertEqual("Foo bar,baz", item.find("Keyword").text)
        self.assertEqual("National", item.find("Category").text)
        self.assertEqual("National,Health,Politics",
                         item.find("IndexCode").text)
        self.assertEqual(str(self.article["urgency"]),
                         item.find("RankingValue").text)
        self.assertEqual("News - Need to Know", item.find("Ranking").text)
        self.assertEqual("1", item.find("Language").text)

        # timestamps
        self.assertEqual("0001-01-01T00:00:00", item.find("EmbargoTime").text)
        self.assertEqual("2020-04-01T11:33:12",
                         item.find("CreatedDateTime").text)
        self.assertEqual("2020-04-01T07:23:12-04:00",
                         item.find("UpdatedDateTime").text)

        # etc
        self.assertEqual("NewsAlert", item.find("VersionType").text)

    def test_writethru(self):
        expected_data = {
            1: "1st",
            2: "2nd",
            3: "3rd",
            4: "4th",
            5: "5th",
            10: "10th",
            100: "100th",
            101: "101st",
        }

        for val, num in expected_data.items():
            item = self.format_item({"rewrite_sequence": val})
            self.assertEqual(num, item.find("WritethruNum").text)
            self.assertEqual(str(val), item.find("WritethruValue").text)
            self.assertEqual("Writethru", item.find("WriteThruType").text)

    def test_dateline(self):
        item = self.format_item({
            "dateline": {
                "source": "AAP",
                "text": "sample dateline",
                "located": {
                    "alt_name": "",
                    "state": "California",
                    "city_code": "Los Angeles",
                    "city": "Los Angeles",
                    "dateline": "city",
                    "country_code": "US",
                    "country": "USA",
                    "tz": "America/Los_Angeles",
                    "state_code": "CA",
                    "location": {
                        "lat": 34.0522,
                        "lon": -118.2347,
                    },
                },
            },
        })
        self.assertEqual("Los Angeles", item.find("City").text)
        self.assertEqual("California", item.find("Province").text)
        self.assertEqual("USA", item.find("Country").text)
        self.assertEqual("Los Angeles;California;USA",
                         item.find("Placeline").text)
        self.assertEqual("34.0522", item.find("Latitude").text)
        self.assertEqual("-118.2347", item.find("Longitude").text)

    def test_globenewswire(self):
        output = self.format(
            {
                "source":
                globenewswire.SOURCE,
                "headline":
                "Foo",
                "keywords": ["TSX VENTURE:AXL", "OTC:NTGSF"],
                "anpa_category": [{
                    "name": globenewswire.DESCRIPTION["en"],
                    "qcode": "p",
                }],
                "subject": [
                    {
                        "name": "FOO",
                        "qcode": "FOO",
                        "scheme": cp.SERVICE
                    },
                    {
                        "name": "BAR",
                        "qcode": "BAR",
                        "scheme": cp.SERVICE
                    },
                ],
                "extra": {},
            },
            _all=True,
        )

        self.assertEqual(2, len(output))

        root = self.parse(output[0][1])
        item = root.find("ContentItem")

        self.assertEqual("Print", root.find("Services").text)
        self.assertEqual("FOO", root.find("PscCodes").text)

        self.assertEqual("Press Release", item.find("Category").text)
        self.assertEqual("Press Release", item.find("IndexCode").text)
        self.assertEqual("FOO,BAR", item.find("Note").text)
        self.assertEqual("TSX VENTURE:AXL,OTC:NTGSF", item.find("Stocks").text)
        self.assertEqual("Foo", item.find("Headline").text)
        self.assertEqual("Foo", item.find("Headline2").text)

    def test_limits(self):
        long = "foo bar {}".format("x" * 200)
        item = self.format_item({
            "headline": long,
            "extra": {
                "headline2": long,
            },
            "keywords": ["foo", "bar", long],
        })

        self.assertEqual("foo bar", item.find("Headline").text)
        self.assertEqual("foo bar", item.find("Headline2").text)
        self.assertEqual("foo,bar,foo bar", item.find("Keyword").text)

    def test_picture(self):
        updates = {
            "type":
            "picture",
            "guid":
            "urn:picture",
            "urgency":
            5,
            "byline":
            "photographer",
            "headline":
            "some headline",
            "slugline":
            "slug",
            "firstcreated":
            datetime(2020, 6, 3, 17, 0, 56, tzinfo=UTC),
            "extra": {
                cp.FILENAME: "NY538",
                "photographer_code": "stf",
            },
            "subject": [
                {
                    "name": "Americas",
                    "qcode": "A",
                    "scheme": "photo_categories"
                },
            ],
            "creditline":
            "THE ASSOCIATED PRESS",
            "original_source":
            "The Associated Press",
            "copyrightnotice":
            "Copyright 2020 The Associated Press. All rights reserved.",
            "description_text":
            "Pedestrians are silhouetted",
            "renditions": {
                "original": {
                    "media": "media_id",
                    "mimetype": "image/jpeg",
                },
            },
        }
        root = self.format_item(updates, True)

        self.assertEqual("Pictures", root.find("Services").text)
        self.assertEqual("Online", root.find("PscCodes").text)

        item = root.find("ContentItem")

        self.assertEqual(updates["byline"], item.find("Byline").text)
        self.assertEqual("false", item.find("HeadlineService").text)
        self.assertEqual("A", item.find("Category").text)
        self.assertEqual("None", item.find("VideoType").text)
        self.assertEqual("None", item.find("PhotoType").text)
        self.assertEqual("None", item.find("GraphicType").text)
        self.assertEqual("News - Optional", item.find("Ranking").text)
        self.assertEqual("5", item.find("RankingValue").text)
        self.assertEqual(updates["creditline"], item.find("Credit").text)
        self.assertEqual("Photo", item.find("ContentType").text)
        self.assertEqual(updates["slugline"], item.find("SlugProper").text)
        self.assertEqual(updates["original_source"], item.find("Source").text)
        self.assertEqual(updates["extra"][cp.FILENAME],
                         item.find("OrigTransRef").text)
        self.assertEqual("STF", item.find("BylineTitle").text)
        self.assertEqual(updates["copyrightnotice"][:50],
                         item.find("Copyright").text)
        self.assertEqual(updates["description_text"],
                         item.find("EnglishCaption").text)
        self.assertEqual("2020-06-03T17:00:56", item.find("DateTaken").text)

        self.assertEqual("media_id", item.find("FileName").text)
        self.assertEqual("media_id.jpg", item.find("ViewFile").text)
        self.assertEqual("media_id.jpg", item.find("ContentRef").text)
        self.assertEqual(updates["guid"], item.find("SystemSlug").text)

        self.assertEqual(1, len(item.findall("FileName")))

    def test_picture_amazon(self):
        updates = {
            "type": "picture",
            "renditions": {
                "original": {
                    "media": "20200807100836/5f2d12c8ced0b19f31ea318ajpeg.jpg",
                },
            },
        }
        item = self.format_item(updates)
        filename = updates["renditions"]["original"]["media"].replace("/", "-")
        self.assertEqual(
            os.path.splitext(filename)[0],
            item.find("FileName").text)
        self.assertEqual(filename, item.find("ViewFile").text)
        self.assertEqual(filename, item.find("ContentRef").text)

    def test_embargo(self):
        embargo = datetime(2020, 7, 22, 13, 10, 5, tzinfo=UTC)
        updates = {
            SCHEDULE_SETTINGS: {
                "utc_embargo": embargo,
            },
        }

        item = self.format_item(updates)
        self.assertEqual("2020-07-22T09:10:05", item.find("EmbargoTime").text)

        item = self.format_item({"embargoed": embargo})
        self.assertEqual("2020-07-22T09:10:05", item.find("EmbargoTime").text)

    def test_format_credit(self):
        item = self.format_item({"source": "CP", "creditline": None})
        self.assertEqual("THE CANADIAN PRESS", item.find("Credit").text)

    def test_item_with_picture(self):
        updates = {
            "source": "CP",
            "associations": {
                "gallery--1": {
                    "_id": "foo",
                    "type": "picture",
                    "guid": "foo:guid",
                    "renditions": {
                        "original": {
                            "media": "foo",
                            "mimetype": "image/jpeg",
                        },
                    },
                },
                "gallery--2": {
                    "_id": "bar",
                    "type": "picture",
                    "guid": "bar:guid",
                    "renditions": {
                        "original": {
                            "media": "bar",
                            "mimetype": "image/jpeg",
                        },
                    },
                },
                "gallery--3": {  # same picture twice
                    "_id": "bar",
                    "type": "picture",
                    "guid": "bar:guid",
                    "renditions": {
                        "original": {
                            "media": "bar",
                            "mimetype": "image/jpeg",
                        },
                    },
                },
            },
        }

        item = self.format_item(updates)

        self.assertEqual("Many", item.find("PhotoType").text)
        self.assertEqual("foo,bar", item.find("PhotoReference").text)

    def test_format_filename_rewrite(self):
        date_1am_et = datetime(2020, 8, 12, 5, tzinfo=UTC)
        date_2am_et = date_1am_et + timedelta(hours=1)
        date_3am_et = date_1am_et + timedelta(hours=2)

        resources["archive"].service.find_one.side_effect = [
            {
                "guid": "same-cycle",
                "rewrite_of": "prev-cycle",
                "firstcreated": date_2am_et,
                "unique_id": 2,
                "type": "text",
            },
            {
                "guid": "prev-cycle",
                "firstcreated": date_1am_et,
                "unique_id": 1,
                "type": "text",
            },
        ]

        item = self.format_item({
            "guid": "last",
            "rewrite_of": "same-cycle",
            "extra": {},
            "firstcreated": date_3am_et,
            "type": "text",
        })

        resources["archive"].service.find_one.side_effect = None

        self.assertEqual("prev-cycle", item.find("FileName").text)
        self.assertEqual("prev-cycle", item.find("SystemSlug").text)

    def test_format_fr_CA(self):
        updates = {
            "language":
            "fr-CA",
            "anpa_category": [{
                "name": "National",
                "qcode": "g"
            }],
            "rewrite_sequence":
            2,
            "subject": [
                {
                    "name": "Broadcast",
                    "qcode": cp.BROADCAST,
                    "scheme": cp.DISTRIBUTION
                },
            ],
        }

        item = self.format_item(updates)

        self.assertEqual("2", item.find("Language").text)
        self.assertEqual("Nouvelles Générales", item.find("Category").text)
        self.assertEqual("Nouvelles Générales", item.find("IndexCode").text)
        self.assertEqual("Alerte", item.find("VersionType").text)
        self.assertEqual("Nouvelle - Majeur", item.find("Ranking").text)
        self.assertEqual("Radio", item.find("..").find("Services").text)

        self.assertEqual("2", item.find("WritethruValue").text)
        self.assertEqual("2ème", item.find("WritethruNum").text)
        self.assertEqual("Lead", item.find("WriteThruType").text)

    def test_correction_update(self):
        item = self.format_item({
            "extra": {
                cp.UPDATE: "update text",
                cp.CORRECTION: "correction text",
            }
        })
        self.assertEqual("update text", item.find("UpdateNote").text)
        self.assertEqual("correction text", item.find("Corrections").text)

    def test_writethru_keeps_newscompid(self):
        resources["archive"].service.find_one.side_effect = [
            {
                "guid": "same-cycle",
                "rewrite_of": "prev-cycle",
                "unique_id": 2,
                "type": "text",
            },
            {
                "guid": "prev-cycle",
                "unique_id": 1,
                "type": "text"
            },
        ]

        item = self.format_item({
            "type": "text",
            "rewrite_of": "same-cycle",
            "unique_id": 3,
        })

        resources["archive"].service.find_one.side_effect = None

        self.assertEqual("00000001", item.find("NewsCompID").text)

    def test_ap_update_keeps_newscomip(self):
        resources["ingest"].service.find_one.side_effect = [{
            "unique_id": 1,
        }]

        item = self.format_item({
            "type": "text",
            "unique_id": 5,
        })

        resources["ingest"].service.find_one.side_effect = None

        self.assertEqual("00000001", item.find("NewsCompID").text)

    def test_picture_container_ids(self):
        resources["news"].service.get.side_effect = [[
            {
                "guid": "canceled",
                "pubstatus": "canceled",
                "type": "text"
            },
            {
                "guid": "usable",
                "pubstatus": "usable",
                "type": "text"
            },
            {
                "guid": "usable2",
                "pubstatus": "usable",
                "type": "text",
                "extra": {
                    cp.ORIG_ID: 32 * "a",  # slug constraints
                },
            },
        ]]

        item = self.format_item({
            "type": "picture",
            "unique_id": 3,
        })

        resources["news"].service.get.side_effect = None

        self.assertEqual("{}, usable".format(32 * "a"),
                         item.find("ContainerIDs").text)

    def test_placeline_washington(self):
        item = self.format_item({
            "dateline": {
                "source": "AAP",
                "text": "sample dateline",
                "located": {
                    "dateline": "city",
                    "country_code": "US",
                    "tz": "America/New_York",
                    "city_code": "Washington",
                    "state_code": "DC",
                    "state": "Washington, D.C.",
                    "city": "Washington",
                    "country": "United States",
                    "code": "4140963",
                    "scheme": "geonames",
                },
            },
        })
        self.assertEqual("Washington;District of Columbia;United States",
                         item.find("Placeline").text)
        self.assertEqual("District of Columbia", item.find("Province").text)

    def test_format_content(self):
        item = self.format_item({
            "body_html":
            "<p>Body HTML<br>test remove bold <b>  </b> and <b>bold1</b> and <i>idiom</i></p>"
        })
        content_text = item.find("ContentText").text
        self.assertEqual(
            "<p>Body HTML<br />test remove bold and <strong>bold1</strong> and <em>idiom</em></p>",
            str(" ".join(content_text.split())),
        )

    def test_ap_translated(self):
        item = self.format_item({
            "language": "fr-CA",
            "extra": {
                cp.ORIG_ID: 'a' * 32
            },
        })

        self.assertEqual('a' * 30 + 'fa', item.find('SystemSlug').text)
예제 #5
0
class JimiFormatterTestCase(BaseXmlFormatterTestCase):

    formatter = JimiFormatter()
    article = {
        '_id':
        'id',
        'guid':
        'id',
        'family_id':
        'famid',
        'type':
        'text',
        'headline':
        'Headline',
        'slugline':
        'slug',
        'creditline':
        'Credit',
        'source':
        'Source',
        'ednote':
        'Ednote',
        'word_count':
        123,
        'abstract':
        '<p>Abstract</p>',
        'body_html':
        '<p>Body HTML<br>test <b>bold</b> and <i>idiom</i></p>',
        'keywords': ['Foo bar', 'baz'],
        'anpa_category': [{
            'name': 'National',
            'qcode': 'n'
        }],
        'subject': [
            {
                'name': 'health',
                'qcode': '07000000',
                'scheme': 'subject_custom'
            },
            {
                'name': 'citizens',
                'qcode': '20000575',
                'scheme': 'subject_custom'
            },
            {
                'name': 'Foo',
                'qcode': '1231245',
                'scheme': 'foo'
            },
            {
                'name': 'Print',
                'qcode': 'Print',
                'scheme': cp.DISTRIBUTION
            },
            {
                'name': 'The Associated Press',
                'qcode': 'ap---',
                'scheme': cp.DESTINATIONS
            },
        ],
        'urgency':
        2,
        'language':
        'en-CA',
        'unique_id':
        123,
        'firstcreated':
        datetime(2020, 4, 1, 11, 13, 12, 25, tzinfo=UTC),
        'versioncreated':
        datetime(2020, 4, 1, 11, 23, 12, 25, tzinfo=UTC),
        'firstpublished':
        datetime(2020, 4, 1, 11, 33, 12, 25, tzinfo=UTC),
        'genre': [
            {
                'name': 'NewsAlert',
                'qcode': 'NewsAlert'
            },
        ],
        'extra': {
            cp.HEADLINE2: 'headline2',
            cp.FILENAME: 'filename',
        },
    }

    def format_item(self, updates=None, return_root=False):
        xml = self.format(updates)
        root = self.parse(xml)
        if return_root:
            return root
        return root.find('ContentItem')

    def test_can_format(self):
        self.assertTrue(self.formatter.can_format('jimi', {}))

    def test_format(self):
        xml = self.format()
        self.assertIn("<?xml version='1.0' encoding='utf-8'?>", xml)
        self.assertIn('<ContentText>&lt;p&gt;Body HTML&lt;br /&gt;test', xml)

        root = self.parse(xml)
        self.assertEqual('Publish', root.tag)
        self.assertEqual('false', root.find('Reschedule').text)
        self.assertEqual('false', root.find('IsRegional').text)
        self.assertEqual('true', root.find('CanAutoRoute').text)
        self.assertEqual(str(SEQUENCE_NUMBER), root.find('PublishID').text)
        self.assertEqual('Print', root.find('Services').text)
        self.assertEqual(None, root.find('Username').text)
        self.assertEqual('false', root.find('UseLocalsOut').text)
        self.assertEqual('ap---', root.find('PscCodes').text)
        self.assertEqual('2020-04-01T11:33:12',
                         root.find('PublishDateTime').text)

        item = root.find('ContentItem')
        self.assertEqual(None, item.find('Name').text)
        self.assertEqual('false', item.find('Cachable').text)

        # ids
        self.assertEqual('00000100', item.find('ContentItemID').text)
        self.assertEqual('00000123', item.find('NewsCompID').text)
        self.assertEqual(self.article['guid'], item.find('SystemSlug').text)
        self.assertEqual(self.article['guid'], item.find('FileName').text)
        self.assertEqual(self.article['extra'][cp.FILENAME],
                         item.find('OrigTransRef').text)

        # obvious
        self.assertEqual('Text', item.find('ContentType').text)

        # SDCP-309
        self.assertEqual(self.article['headline'], item.find('Headline2').text)
        self.assertEqual('headline2', item.find('Headline').text)

        self.assertEqual(self.article['creditline'], item.find('Credit').text)
        self.assertEqual(self.article['slugline'],
                         item.find('SlugProper').text)
        self.assertEqual(self.article['source'], item.find('Source').text)
        self.assertEqual(self.article['ednote'], item.find('EditorNote').text)
        self.assertEqual('6', item.find('WordCount').text)
        self.assertEqual('6', item.find('BreakWordCount').text)
        self.assertEqual('6', item.find('Length').text)
        self.assertEqual('Body HTMLtest bold and idiom',
                         item.find('DirectoryText').text)
        self.assertEqual(
            '<p>Body HTML<br />test <strong>bold</strong> and <em>idiom</em></p>',
            item.find('ContentText').text)
        self.assertEqual(None, item.find('Placeline').text)
        self.assertEqual('0', item.find('WritethruValue').text)
        self.assertEqual('Foo bar,baz', item.find('Keyword').text)
        self.assertEqual('National', item.find('Category').text)
        self.assertEqual('National,Health,Politics',
                         item.find('IndexCode').text)
        self.assertEqual(str(self.article['urgency']),
                         item.find('RankingValue').text)
        self.assertEqual('News - Need to Know', item.find('Ranking').text)
        self.assertEqual('1', item.find('Language').text)

        # timestamps
        self.assertEqual('0001-01-01T00:00:00', item.find('EmbargoTime').text)
        self.assertEqual('2020-04-01T11:33:12',
                         item.find('CreatedDateTime').text)
        self.assertEqual('2020-04-01T07:23:12-04:00',
                         item.find('UpdatedDateTime').text)

        # etc
        self.assertEqual('NewsAlert', item.find('VersionType').text)

    def test_writethru(self):
        expected_data = {
            1: '1st',
            2: '2nd',
            3: '3rd',
            4: '4th',
            5: '5th',
            10: '10th',
            100: '100th',
            101: '101st',
        }

        for val, num in expected_data.items():
            item = self.format_item({'rewrite_sequence': val})
            self.assertEqual(num, item.find('WritethruNum').text)
            self.assertEqual(str(val), item.find('WritethruValue').text)
            self.assertEqual('Writethru', item.find('WriteThruType').text)

    def test_dateline(self):
        item = self.format_item({
            'dateline': {
                'source': 'AAP',
                'text': 'sample dateline',
                'located': {
                    'alt_name': '',
                    'state': 'California',
                    'city_code': 'Los Angeles',
                    'city': 'Los Angeles',
                    'dateline': 'city',
                    'country_code': 'US',
                    'country': 'USA',
                    'tz': 'America/Los_Angeles',
                    'state_code': 'CA',
                    'location': {
                        'lat': 34.0522,
                        'lon': -118.2347,
                    },
                }
            },
        })
        self.assertEqual('Los Angeles', item.find('City').text)
        self.assertEqual('California', item.find('Province').text)
        self.assertEqual('USA', item.find('Country').text)
        self.assertEqual('Los Angeles;California;USA',
                         item.find('Placeline').text)
        self.assertEqual('34.0522', item.find('Latitude').text)
        self.assertEqual('-118.2347', item.find('Longitude').text)

    def test_globenewswire(self):
        output = self.format(
            {
                'source':
                globenewswire.SOURCE,
                'headline':
                'Foo',
                'keywords': ['TSX VENTURE:AXL', 'OTC:NTGSF'],
                'anpa_category': [{
                    'name': globenewswire.DESCRIPTION['en'],
                    'qcode': 'p',
                }],
                'subject': [
                    {
                        'name': 'FOO',
                        'qcode': 'FOO',
                        'scheme': cp.SERVICE
                    },
                    {
                        'name': 'BAR',
                        'qcode': 'BAR',
                        'scheme': cp.SERVICE
                    },
                ],
                'extra': {},
            },
            _all=True)

        self.assertEqual(2, len(output))

        root = self.parse(output[0][1])
        item = root.find('ContentItem')

        self.assertEqual('Print', root.find('Services').text)
        self.assertEqual('FOO', root.find('PscCodes').text)

        self.assertEqual('Press Release', item.find('Category').text)
        self.assertEqual('Press Release', item.find('IndexCode').text)
        self.assertEqual('FOO,BAR', item.find('Note').text)
        self.assertEqual('TSX VENTURE:AXL,OTC:NTGSF', item.find('Stocks').text)
        self.assertEqual('Foo', item.find('Headline').text)
        self.assertEqual('Foo', item.find('Headline2').text)

    def test_limits(self):
        long = 'foo bar {}'.format('x' * 200)
        item = self.format_item({
            'headline': long,
            'extra': {
                'headline2': long,
            },
            'keywords': ['foo', 'bar', long],
        })

        self.assertEqual('foo bar', item.find('Headline').text)
        self.assertEqual('foo bar', item.find('Headline2').text)
        self.assertEqual('foo,bar,foo bar', item.find('Keyword').text)

    def test_picture(self):
        updates = {
            'type':
            'picture',
            'guid':
            'urn:picture',
            'urgency':
            5,
            'byline':
            'photographer',
            'headline':
            'some headline',
            'slugline':
            'slug',
            'firstcreated':
            datetime(2020, 6, 3, 17, 0, 56, tzinfo=UTC),
            'extra': {
                cp.FILENAME: 'NY538',
                'photographer_code': 'stf',
            },
            'subject': [
                {
                    'name': 'Americas',
                    'qcode': 'A',
                    'scheme': 'photo_categories'
                },
            ],
            'creditline':
            'THE ASSOCIATED PRESS',
            'original_source':
            'The Associated Press',
            'copyrightnotice':
            'Copyright 2020 The Associated Press. All rights reserved.',
            'description_text':
            'Pedestrians are silhouetted',
            'renditions': {
                'original': {
                    'media': 'media_id',
                    'mimetype': 'image/jpeg',
                },
            },
        }
        root = self.format_item(updates, True)

        self.assertEqual('Pictures', root.find('Services').text)
        self.assertEqual('Online', root.find('PscCodes').text)

        item = root.find('ContentItem')

        self.assertEqual(updates['byline'], item.find('Byline').text)
        self.assertEqual('false', item.find('HeadlineService').text)
        self.assertEqual('A', item.find('Category').text)
        self.assertEqual('None', item.find('VideoType').text)
        self.assertEqual('None', item.find('PhotoType').text)
        self.assertEqual('None', item.find('GraphicType').text)
        self.assertEqual('News - Optional', item.find('Ranking').text)
        self.assertEqual('5', item.find('RankingValue').text)
        self.assertEqual(updates['creditline'], item.find('Credit').text)
        self.assertEqual('Photo', item.find('ContentType').text)
        self.assertEqual(updates['slugline'], item.find('SlugProper').text)
        self.assertEqual(updates['original_source'], item.find('Source').text)
        self.assertEqual(updates['extra'][cp.FILENAME],
                         item.find('OrigTransRef').text)
        self.assertEqual('STF', item.find('BylineTitle').text)
        self.assertEqual(updates['copyrightnotice'][:50],
                         item.find('Copyright').text)
        self.assertEqual(updates['description_text'],
                         item.find('EnglishCaption').text)
        self.assertEqual('2020-06-03T17:00:56', item.find('DateTaken').text)

        self.assertEqual('media_id', item.find('FileName').text)
        self.assertEqual('media_id.jpg', item.find('ViewFile').text)
        self.assertEqual('media_id.jpg', item.find('ContentRef').text)
        self.assertEqual(updates['guid'], item.find('SystemSlug').text)

        self.assertEqual(1, len(item.findall('FileName')))

    def test_picture_amazon(self):
        updates = {
            'type': 'picture',
            'renditions': {
                'original': {
                    'media': '20200807100836/5f2d12c8ced0b19f31ea318ajpeg.jpg',
                },
            },
        }
        item = self.format_item(updates)
        filename = updates['renditions']['original']['media'].replace('/', '-')
        self.assertEqual(
            os.path.splitext(filename)[0],
            item.find('FileName').text)
        self.assertEqual(filename, item.find('ViewFile').text)
        self.assertEqual(filename, item.find('ContentRef').text)

    def test_embargo(self):
        embargo = datetime(2020, 7, 22, 13, 10, 5, tzinfo=UTC)
        updates = {
            SCHEDULE_SETTINGS: {
                'utc_embargo': embargo,
            },
        }

        item = self.format_item(updates)
        self.assertEqual('2020-07-22T09:10:05', item.find('EmbargoTime').text)

        item = self.format_item({'embargoed': embargo})
        self.assertEqual('2020-07-22T09:10:05', item.find('EmbargoTime').text)

    def test_format_credit(self):
        item = self.format_item({'source': 'CP', 'creditline': None})
        self.assertEqual('THE CANADIAN PRESS', item.find('Credit').text)

    def test_item_with_picture(self):
        updates = {
            'source': 'CP',
            'associations': {
                'gallery--1': {
                    '_id': 'foo',
                    'type': 'picture',
                    'guid': 'foo:guid',
                    'renditions': {
                        'original': {
                            'media': 'foo',
                            'mimetype': 'image/jpeg',
                        },
                    },
                },
                'gallery--2': {
                    '_id': 'bar',
                    'type': 'picture',
                    'guid': 'bar:guid',
                    'renditions': {
                        'original': {
                            'media': 'bar',
                            'mimetype': 'image/jpeg',
                        },
                    },
                },
                'gallery--3': {  # same picture twice
                    '_id': 'bar',
                    'type': 'picture',
                    'guid': 'bar:guid',
                    'renditions': {
                        'original': {
                            'media': 'bar',
                            'mimetype': 'image/jpeg',
                        },
                    },
                },
            },
        }

        item = self.format_item(updates)

        self.assertEqual('Many', item.find('PhotoType').text)
        self.assertEqual('foo,bar', item.find('PhotoReference').text)

    def test_format_filename_rewrite(self):
        date_1am_et = datetime(2020, 8, 12, 5, tzinfo=UTC)
        date_2am_et = date_1am_et + timedelta(hours=1)
        date_3am_et = date_1am_et + timedelta(hours=2)

        resources['archive'].service.find_one.side_effect = [
            {
                'guid': 'same-cycle',
                'rewrite_of': 'prev-cycle',
                'firstcreated': date_2am_et,
                'unique_id': 2,
                'type': 'text'
            },
            {
                'guid': 'prev-cycle',
                'firstcreated': date_1am_et,
                'unique_id': 1,
                'type': 'text'
            },
        ]

        item = self.format_item({
            'guid': 'last',
            'rewrite_of': 'same-cycle',
            'extra': {},
            'firstcreated': date_3am_et,
            'type': 'text'
        })
        self.assertEqual('prev-cycle', item.find('FileName').text)
        self.assertEqual('prev-cycle', item.find('SystemSlug').text)

    def test_format_fr_CA(self):
        updates = {
            'language':
            'fr-CA',
            'anpa_category': [{
                'name': 'National',
                'qcode': 'g'
            }],
            'rewrite_sequence':
            2,
            'subject': [
                {
                    'name': 'Broadcast',
                    'qcode': cp.BROADCAST,
                    'scheme': cp.DISTRIBUTION
                },
            ],
        }

        item = self.format_item(updates)

        self.assertEqual('2', item.find('Language').text)
        self.assertEqual("Nouvelles Générales", item.find('Category').text)
        self.assertEqual("Nouvelles Générales", item.find('IndexCode').text)
        self.assertEqual("Alerte", item.find('VersionType').text)
        self.assertEqual("Nouvelle - Majeur", item.find('Ranking').text)
        self.assertEqual("Radio", item.find('..').find('Services').text)

        self.assertEqual('2', item.find('WritethruValue').text)
        self.assertEqual('2ème', item.find('WritethruNum').text)
        self.assertEqual('Lead', item.find('WriteThruType').text)

    def test_correction_update(self):
        item = self.format_item({
            'extra': {
                cp.UPDATE: 'update text',
                cp.CORRECTION: 'correction text',
            }
        })
        self.assertEqual('update text', item.find('UpdateNote').text)
        self.assertEqual('correction text', item.find('Corrections').text)

    def test_writethru_keeps_newscompid(self):
        resources['archive'].service.find_one.side_effect = [
            {
                'guid': 'same-cycle',
                'rewrite_of': 'prev-cycle',
                'unique_id': 2,
                'type': 'text'
            },
            {
                'guid': 'prev-cycle',
                'unique_id': 1,
                'type': 'text'
            },
        ]

        item = self.format_item({
            'type': 'text',
            'rewrite_of': 'same-cycle',
            'unique_id': 3,
        })

        self.assertEqual('00000001', item.find('NewsCompID').text)

    def test_picture_container_ids(self):
        resources['news'].service.get.side_effect = [[
            {
                'guid': 'canceled',
                'pubstatus': 'canceled',
                'type': 'text'
            },
            {
                'guid': 'usable',
                'pubstatus': 'usable',
                'type': 'text'
            },
            {
                'guid': 'usable2',
                'pubstatus': 'usable',
                'type': 'text',
                'extra': {
                    cp.ORIG_ID: 32 * 'a',  # slug constraints
                }
            },
        ]]

        item = self.format_item({
            'type': 'picture',
            'unique_id': 3,
        })

        resources['news'].service.get.side_effect = None
        self.assertEqual('{}, usable'.format(32 * 'a'),
                         item.find('ContainerIDs').text)

    def test_placeline_washington(self):
        item = self.format_item({
            'dateline': {
                'source': 'AAP',
                'text': 'sample dateline',
                'located': {
                    "dateline": "city",
                    "country_code": "US",
                    "tz": "America/New_York",
                    "city_code": "Washington",
                    "state_code": "DC",
                    "state": "Washington, D.C.",
                    "city": "Washington",
                    "country": "United States",
                    "code": "4140963",
                    "scheme": "geonames",
                },
            },
        })
        self.assertEqual('Washington;District of Columbia;United States',
                         item.find('Placeline').text)
        self.assertEqual('District of Columbia', item.find('Province').text)
예제 #6
0
class CP_AP_ParseTestCase(unittest.TestCase):

    app = flask.Flask(__name__)
    app.locators = MagicMock()
    app.config.update({"AP_TAGS_MAPPING": settings.AP_TAGS_MAPPING})
    subscriber = {}
    formatter = JimiFormatter()

    def test_slugline(self):
        parser = CP_APMediaFeedParser()
        self.assertEqual("foo-bar-baz", parser.process_slugline("foo bar/baz"))
        self.assertEqual("foo-bar", parser.process_slugline("foo-bar"))
        self.assertEqual("foo-bar", parser.process_slugline("foo - bar"))

    def test_parse(self):
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(data, provider)

        self.assertEqual("ba7d03f0cd24a17faa81bebc724bcf3f", item["guid"])
        self.assertEqual("Story", item["profile"])
        self.assertEqual("WY-Exchange-Coronavirus-Tech", item["slugline"])
        self.assertEqual("headline1", item["headline"])
        self.assertEqual("headline1", item["extra"][cp.HEADLINE2])
        self.assertIn("copyright information", item["copyrightnotice"])
        self.assertIn("editorial use only", item["usageterms"])
        self.assertEqual("The Associated Press", item["source"])
        self.assertEqual(5, item["urgency"])
        self.assertEqual("Margaret Austin", item["byline"])
        self.assertIn("General news", item["keywords"])

        self.assertIn(
            {
                "name": "Feature",
                "qcode": "Feature",
            },
            item["genre"],
        )

        self.assertEqual("UPDATES: With AP Photos.", item["extra"]["update"])
        self.assertEqual("", item["ednote"])

        self.assertEqual("NYSE:WFC", item["extra"]["stocks"])
        self.assertEqual("m0012", item["extra"][cp.FILENAME])
        self.assertEqual(0, item["extra"]["ap_version"])

        self.assertIn(
            {
                "name": "International",
                "qcode": "w",
                "scheme": "categories",
                "translations": {
                    "name": {
                        "en-CA": "International",
                        "fr-CA": "International"
                    }
                }
            },
            item["anpa_category"],
        )

        subjects = [
            s["name"] for s in item["subject"]
            if s.get("scheme") == "subject_custom"
        ]

        self.assertEqual(["health"], subjects)

        tags = [
            s["name"] for s in item["subject"] if s.get("scheme") == cp.TAG
        ]
        self.assertEqual(2, len(tags))
        self.assertIn("APV", tags)
        self.assertIn("TSX", tags)

        products = [
            s["qcode"] for s in item["subject"]
            if s.get("scheme") == cp.AP_PRODUCT
        ]
        self.assertEqual(6, len(products))
        self.assertIn("33381", products)

        dateline = item["dateline"]
        self.assertEqual("Wyoming Tribune Eagle", dateline["source"])
        self.assertEqual("CHEYENNE, Wyo.", dateline["text"])
        self.assertIn("located", dateline)
        self.assertEqual("Cheyenne", dateline["located"]["city"])
        self.assertEqual("Wyoming", dateline["located"]["state"])
        self.assertEqual("WY", dateline["located"]["state_code"])
        self.assertEqual("United States", dateline["located"]["country"])
        self.assertEqual("USA", dateline["located"]["country_code"])
        self.assertEqual(41.13998, dateline["located"]["location"]["lat"])
        self.assertEqual(-104.82025, dateline["located"]["location"]["lon"])

        self.assertIn("associations", item)
        self.assertIn("media-gallery--1", item["associations"])
        self.assertIn("media-gallery--2", item["associations"])

        self.assertEqual(1, len(item["place"]))
        self.assertEqual(
            {
                "name": "Cheyenne",
                "qcode": "Cheyenne",
                "state": "Wyoming",
                "country": "United States",
                "world_region": "North America",
                "location": {
                    "lat": 41.13998,
                    "lon": -104.82025,
                },
            },
            item["place"][0],
        )

        self.assertRegex(item["body_html"], r"^<p>.*</p>$")

    def test_parse_ignore_associations_based_on_type_config(self):
        _provider = {
            "content_types": ["text"],
        }

        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(data, _provider)

        self.assertFalse(item.get("associations"))

    def test_parse_picture(self):
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                with requests_mock.mock() as mock:
                    with open(get_fixture_path("preview.jpg", "ap"),
                              "rb") as f:
                        mock.get(
                            picture_data["data"]["item"]["renditions"]
                            ["preview"]["href"],
                            content=f.read(),
                        )
                    item = parser.parse(picture_data, provider)

        self.assertEqual("Jae C. Hong", item["byline"])
        self.assertEqual(5, item["urgency"])
        self.assertEqual("ASSOCIATED PRESS", item["creditline"])
        self.assertEqual("America Protests Racial Economics", item["headline"])
        self.assertEqual("stf", item["extra"]["photographer_code"])
        self.assertIn("Pedestrians are silhouetted", item["description_text"])
        self.assertEqual("AP", item["extra"]["provider"])

    def test_parse_embargoed(self):
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                source = copy.deepcopy(data)
                embargoed = datetime.now(
                    pytz.utc).replace(microsecond=0) + timedelta(hours=2)
                source["data"]["item"]["embargoed"] = embargoed.strftime(
                    "%Y-%m-%dT%H:%M:%SZ")
                source["data"]["item"]["pubstatus"] = "embargoed"
                item = parser.parse(source, provider)
                self.assertEqual(embargoed, item["embargoed"])
                self.assertIn("embargo", item)
                self.assertEqual(
                    {
                        "utc_embargo": embargoed,
                        "time_zone": cp.TZ,
                    },
                    item[SCHEDULE_SETTINGS],
                )
                self.assertEqual(PUB_STATUS.HOLD, item["pubstatus"])
                self.assertEqual(["Advance"],
                                 [genre["name"] for genre in item["genre"]])

                embargoed = embargoed - timedelta(hours=5)
                source["data"]["item"]["embargoed"] = embargoed.strftime(
                    "%Y-%m-%dT%H:%M:%SZ")
                item = parser.parse(source, provider)
                self.assertEqual(embargoed, item["embargoed"])
                self.assertNotIn("embargo", item)

    def test_category_politics_international(self):
        with open(get_fixture_path("politics.json", "ap")) as fp:
            _data = json.load(fp)
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})
        self.assertEqual(
            [{
                "name": "International",
                "qcode": "w",
                "scheme": CATEGORY_SCHEME,
                "translations": {
                    "name": {
                        "en-CA": "International",
                        "fr-CA": "International"
                    }
                }
            }],
            item["anpa_category"],
        )
        self.assertEqual("US-Biden-Staff", item["slugline"])

    def test_category_apv(self):
        with open(get_fixture_path("apv.json", "ap")) as fp:
            _data = json.load(fp)
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})
        self.assertEqual(
            [{
                "name": "International",
                "qcode": "w",
                "scheme": CATEGORY_SCHEME,
                "translations": {
                    "name": {
                        "en-CA": "International",
                        "fr-CA": "International"
                    }
                }
            }],
            item["anpa_category"],
        )
        self.assertEqual("EU-Spain-Storm-Aftermath", item["slugline"])

    def test_category_tennis(self):
        with open(get_fixture_path("ap-sports.json", "ap")) as fp:
            _data = json.load(fp)
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})
        self.assertEqual(
            [{
                "name": "Agate",
                "qcode": "r",
                "scheme": CATEGORY_SCHEME,
                "translations": {
                    "name": {
                        "en-CA": "Agate",
                        "fr-CA": "Statistiques"
                    }
                }
            }],
            item["anpa_category"],
        )
        self.assertEqual([], [
            s["name"]
            for s in item["subject"] if s.get("scheme") == AP_SUBJECT_CV
        ])
        output = self.format(item)
        self.assertIn("<Category>Agate</Category>", output)
        self.assertIn("<IndexCode>Agate</IndexCode>", output)

    def test_ignore_slugline_to_subject_map(self):
        with open(get_fixture_path("ap-sports.json", "ap")) as fp:
            _data = json.load(fp)
            # Prefix slugline with `BC` so slugline -> subject mapping works
            # in this case, slugline -> "BC-TEN-" -> "15065000"
            _data["data"]["item"][
                "slugline"] = "BC" + _data["data"]["item"]["slugline"][2:]

        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})

        self.assertEqual(
            [{
                "name": "Agate",
                "qcode": "r",
                "scheme": CATEGORY_SCHEME,
                "translations": {
                    "name": {
                        "en-CA": "Agate",
                        "fr-CA": "Statistiques"
                    }
                }
            }],
            item["anpa_category"],
        )
        self.assertEqual([], [
            s["name"]
            for s in item["subject"] if s.get("scheme") == AP_SUBJECT_CV
        ])
        output = self.format(item)
        self.assertIn("<Category>Agate</Category>", output)

        # Make sure `IndexCode` only contains `Agate` and not `Sport` or `Tennis`
        self.assertIn("<IndexCode>Agate</IndexCode>", output)

    def test_slugline_prev_version(self):
        with open(get_fixture_path("ap-sports.json", "ap")) as fp:
            _data = json.load(fp)
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                resources["ingest"].service.find_one.return_value = {
                    "slugline": "prev-slugline",
                }
                item = parser.parse(_data, {})
                resources["ingest"].service.find_one.return_value = None
        self.assertEqual("prev-slugline", item["slugline"])

    def test_aps_category(self):
        with open(get_fixture_path("ap-aps.json", "ap")) as fp:
            _data = json.load(fp)
        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})
        self.assertEqual("Advisory", item["anpa_category"][0]["name"])

    def format(self, item):
        with patch.dict(superdesk.resources, resources):
            item["unique_id"] = 1
            return self.formatter.format(item, self.subscriber)[0][1]

    def test_parse_agate_headings(self):
        with open(get_fixture_path("ap-agate.json", "ap")) as fp:
            _data = json.load(fp)

        with self.app.app_context():
            xml = etree.parse(get_fixture_path("ap-agate-nitf.xml", "ap"))
            parsed = nitf.NITFFeedParser().parse(xml)
            _data["nitf"] = parsed

            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})

        self.assertIn("<p>Atlantic Division</p>", item["body_html"])

    def test_parse_table(self):
        with open(get_fixture_path("ap-table.json", "ap")) as fp:
            _data = json.load(fp)

        with self.app.app_context():
            xml = etree.parse(get_fixture_path("ap-table-nitf.xml", "ap"))
            parsed = nitf.NITFFeedParser().parse(xml)
            _data["nitf"] = parsed

            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})

        self.assertIn("<table>", item["body_html"])
        output = self.format(item)
        jimi = etree.fromstring(output.encode("utf-8"))
        print("jimi", jimi)
        content = jimi.find("ContentItem").find("ContentText").text
        self.assertIn("table", content)

    def test_parse_subject_duplicates(self):
        with open(get_fixture_path("ap-subject.json", "ap")) as fp:
            _data = json.load(fp)

        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})

        qcodes = [subj["qcode"] for subj in item["subject"]]
        self.assertEqual(len(qcodes), len(set(qcodes)))

    def test_parse_aps_right_now(self):
        with open(get_fixture_path("ap-aps-mi-right-now.json", "ap")) as fp:
            _data = json.load(fp)

        with self.app.app_context():
            with patch.dict(superdesk.resources, resources):
                item = parser.parse(_data, {})

        self.assertEqual("International", item["anpa_category"][0]["name"])
예제 #7
0
    def test_fetch_to_jimi(self, update_renditions_mock):
        service = OrangelogicSearchProvider(self.provider)

        update_renditions_mock.side_effects = set_rendition

        self.app.media.get.return_value = io.BytesIO(
            read_fixture(
                "9e627f74b97841b3b8562b6547ada9c7-d1538139479c43e88021152.jpg",
                "rb"))

        with HTTMock(auth_ok, fetch_ok):
            with patch.dict(superdesk.resources, resources):
                fetched = service.fetch({})
            update_renditions_mock.assert_called_once_with(
                fetched,
                "https://example.com/htm/GetDocumentAPI.aspx?F=TRX&DocID=2RLQZBCB4R4R4&token=token.foo",
                None,
            )

        self.assertEqual("picture", fetched["type"])
        self.assertIsInstance(fetched["firstcreated"], datetime)

        # populate ids
        fetched["family_id"] = fetched["guid"]
        fetched["unique_id"] = 1

        with patch.dict(superdesk.resources, resources):
            formatter = JimiFormatter()
            xml = formatter.format(fetched, {})[0][1]

        root = etree.fromstring(xml.encode(formatter.ENCODING))

        self.assertEqual("Pictures", root.find("Services").text)

        item = root.find("ContentItem")

        self.assertEqual("Zhang Yuwei", item.find("Byline").text)
        self.assertEqual("I", item.find("Category").text)
        self.assertEqual("News - Optional", item.find("Ranking").text)
        self.assertEqual("5", item.find("RankingValue").text)
        self.assertEqual("THE ASSOCIATED PRESS", item.find("Credit").text)
        self.assertEqual("Virus Outbreak China Vaccine",
                         item.find("SlugProper").text)
        self.assertEqual("Unknown AP", item.find("Source").text)
        self.assertEqual("Beijing", item.find("City").text)
        self.assertEqual("China", item.find("Country").text)
        self.assertEqual("Beijing;;China", item.find("Placeline").text)
        # self.assertEqual('XIN902', item.find('OrigTransRef').text)
        self.assertEqual("SUB", item.find("BylineTitle").text)
        self.assertEqual("NHG", item.find("CaptionWriter").text)
        self.assertEqual("Xinhua", item.find("Copyright").text)
        self.assertIn(
            "In this April 10, 2020, photo released by Xinhua News Agency, a staff",
            item.find("EnglishCaption").text,
        )
        self.assertEqual("2020-04-12T00:09:37", item.find("DateTaken").text)
        self.assertEqual(
            "NO SALES, PHOTO RELEASED BY XINHUA NEWS AGENCY APRIL 10, 2020 PHOTO",
            item.find("SpecialInstructions").text,
        )
        self.assertEqual("Unknown AP", item.find("ArchiveSources").text)
        self.assertEqual("9e627f74b97841b3b8562b6547ada9c7",
                         item.find("CustomField1").text)
        self.assertEqual("Xinhua", item.find("CustomField6").text)
        self.assertEqual("9e627f74b97841b3b8562b6547ada9c7",
                         item.find("SystemSlug").text)