def get_news_items(self, html) -> [NewsItem]:
     news_items = []
     for article in self.__get_page_articles(html):
         news_items.append(
             NewsItem(url=SbsNewsItemParser.__get_item_url(article),
                      title=SbsNewsItemParser.__get_headline_text(article),
                      topic=SbsNewsItemParser.__get_topic_text(article)))
     return news_items
 def get_news_items(self, html) -> [NewsItem]:
     news_items = []
     for article in self.__get_page_articles(html):
         news_items.append(
             NewsItem(url=AbcNewsItemParser.__get_item_url(article),
                      title=AbcNewsItemParser.__get_headline_text(article),
                      content=AbcNewsItemParser.__get_item_content(article),
                      author=AbcNewsItemParser.__get_item_author(article)))
     return news_items
 def test_log_news_item_added(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "A Story", None, None,
                          None, news_website)
     ConsoleNewsLogger().log_news_item_added(news_item)
     expected = "\n===============\n"
     expected += "News item added\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.created_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: A Story\n"
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
 def test_log_news_item_updated_with_author(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "A Story", None,
                          'John Smith', None, news_website)
     ConsoleNewsLogger().log_news_item_updated(news_item, {'author': None})
     expected = "\n===============\n"
     expected += "Existing news item updated\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.updated_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: A Story\n"
     expected += "Author: John Smith\n"
     expected += "---------------\n"
     expected += 'Author changed from: "None" to: "John Smith"\n'
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
 def test_log_news_item_updated_with_title(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "An Updated Story",
                          None, None, None, news_website)
     ConsoleNewsLogger().log_news_item_updated(news_item,
                                               {'title': 'A Story'})
     expected = "\n===============\n"
     expected += "Existing news item updated\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.updated_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: An Updated Story\n"
     expected += "---------------\n"
     expected += 'Title changed from: "A Story" to: "An Updated Story"\n'
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
 def test_log_news_item_updated_with_content(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "A Story",
                          "This is a story summary.", None, None,
                          news_website)
     ConsoleNewsLogger().log_news_item_updated(news_item, {'content': None})
     expected = "\n===============\n"
     expected += "Existing news item updated\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.updated_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: A Story\n"
     expected += "Content: This is a story summary.\n"
     expected += "---------------\n"
     expected += 'Content changed from: "None" to: "This is a story summary."\n'
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
예제 #7
0
    def test_get_news_items(self):
        # Test NewsScanner on ABC News
        abc_news_scanner = NewsScanner(
            NewsWebsite("ABC News", "https://www.abc.net.au",
                        "https://www.abc.net.au/news/"), AbcNewsItemParser())
        # Mock scanner object web scan to return HTML from test data file
        html = get_test_data_file_contents('abc_news_short.html')
        abc_news_scanner._get_web_page_content = MagicMock(return_value=html)
        expected_news_item = NewsItem(
            # expect NewsItem url to become absolute/fully-qualified
            url="https://www.abc.net.au" +
            "/news/2020-06-07/cormann-dubs-black-lives-matter-protesters-selfish-coronavirus/12330196",
            title=
            "Senior Government Minister slams 'selfish' Black Lives Matter protesters",
            content=
            'Senior Federal Government Minister Mathias Cormann takes aim at Australian Black Lives Matter protesters, dubbing their actions "selfish", "self-indulgent" and "reckless" amid the deadly coronavirus pandemic.',
            author=None,
            topic=None)
        actual_news_item = abc_news_scanner.get_news_items()[0]
        self.assertEqual(expected_news_item.url, actual_news_item.url)
        self.assertEqual(expected_news_item.title, actual_news_item.title)
        self.assertEqual(expected_news_item.content, actual_news_item.content)
        self.assertEqual(expected_news_item.author, actual_news_item.author)
        self.assertEqual(expected_news_item.topic, actual_news_item.topic)

        # Test NewsScanner on Nine News
        nine_news_scanner = NewsScanner(
            NewsWebsite("Nine News", "https://www.9news.com.au/",
                        "https://www.9news.com.au/"), NineNewsItemParser())
        html = get_test_data_file_contents('nine_news_short.html')
        nine_news_scanner._get_web_page_content = MagicMock(return_value=html)
        expected_news_item = NewsItem(
            # expect NewsItem url to remain unchanged as is already absolute/fully-qualified
            url=
            "https://www.9news.com.au/national/sydney-protest-town-hall-station-closed-due-to-illegal-black-lives-matter-protest-in-the-cdb/75c4bf88-81d5-49cd-a59d-a30f942b73b7",
            title=
            "Roads close and transport disrupted in cities as thousands protest",
            content=None,
            author=None,
            topic=None)
        actual_news_item = nine_news_scanner.get_news_items()[0]
        self.assertEqual(expected_news_item.url, actual_news_item.url)
        self.assertEqual(expected_news_item.title, actual_news_item.title)
        self.assertEqual(expected_news_item.content, actual_news_item.content)
        self.assertEqual(expected_news_item.author, actual_news_item.author)
        self.assertEqual(expected_news_item.topic, actual_news_item.topic)

        # Test NewsScanner on SBS News
        sbs_news_scanner = NewsScanner(
            NewsWebsite("SBS News", "https://www.sbs.com.au",
                        "https://www.sbs.com.au/news/"), SbsNewsItemParser())
        html = get_test_data_file_contents('sbs_news_short.html')
        sbs_news_scanner._get_web_page_content = MagicMock(return_value=html)
        expected_news_item = NewsItem(
            # expect NewsItem url to become absolute/fully-qualified
            url="https://www.sbs.com.au" +
            "/news/the-feed/most-african-americans-don-t-suffer-daily-shocking-racism-is-that-true",
            title=
            "‘Most African Americans don’t suffer daily shocking racism.’ Is that true?",
            content=None,
            author=None,
            topic=None)
        actual_news_item = sbs_news_scanner.get_news_items()[0]
        self.assertEqual(expected_news_item.url, actual_news_item.url)
        self.assertEqual(expected_news_item.title, actual_news_item.title)
        self.assertEqual(expected_news_item.content, actual_news_item.content)
        self.assertEqual(expected_news_item.author, actual_news_item.author)
        self.assertEqual(expected_news_item.topic, actual_news_item.topic)

        # Test NewsScanner on The Australian
        the_australian_news_scanner = NewsScanner(
            NewsWebsite("The Australian", "https://www.theaustralian.com.au/",
                        "https://www.theaustralian.com.au/"),
            TheAustralianNewsItemParser())
        html = get_test_data_file_contents('the_australian_short.html')
        the_australian_news_scanner._get_web_page_content = MagicMock(
            return_value=html)
        expected_news_item = NewsItem(
            # expect NewsItem url to reamin unchanged as it is already absolute/fully-qualified
            url=
            "https://www.theaustralian.com.au/weekend-australian-magazine/cool-in-a-crisis-jane-halton-leads-the-charge-for-coronavirus-vaccine/news-story/0c28ef3ba39fad19a2ecb0bd11f3e0fe",
            title="‘We dodged a bullet? Don’t kid yourself’",
            content=
            "Spearheading the global race to find and distribute a COVID vaccine, Jane Halton has a warning that should make Australians listen.",
            author="By Megan Lehmann",
            topic="magazine")
        actual_news_item = the_australian_news_scanner.get_news_items()[0]
        self.assertEqual(expected_news_item.url, actual_news_item.url)
        self.assertEqual(expected_news_item.title, actual_news_item.title)
        self.assertEqual(expected_news_item.content, actual_news_item.content)
        self.assertEqual(expected_news_item.author, actual_news_item.author)
        self.assertEqual(expected_news_item.topic, actual_news_item.topic)