def test_log_news_scan_ending(self, mock_stdout):
     timestamp = datetime.now()
     ConsoleNewsLogger().log_news_scan_ending(timestamp)
     expected = "<<<<<<<<<<>>>>>>>>>>\n"
     expected += "Ending news scan at: {}\n".format(timestamp)
     expected += "<<<<<<<<<<>>>>>>>>>>\n"
     self.assertEqual(expected, mock_stdout.getvalue())
 def test_log_news_item_added(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "A Story", None, None,
                          None, news_website)
     ConsoleNewsLogger().log_news_item_added(news_item)
     expected = "\n===============\n"
     expected += "News item added\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.created_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: A Story\n"
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
 def test_log_news_item_updated_with_author(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "A Story", None,
                          'John Smith', None, news_website)
     ConsoleNewsLogger().log_news_item_updated(news_item, {'author': None})
     expected = "\n===============\n"
     expected += "Existing news item updated\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.updated_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: A Story\n"
     expected += "Author: John Smith\n"
     expected += "---------------\n"
     expected += 'Author changed from: "None" to: "John Smith"\n'
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
 def test_log_news_item_updated_with_title(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "An Updated Story",
                          None, None, None, news_website)
     ConsoleNewsLogger().log_news_item_updated(news_item,
                                               {'title': 'A Story'})
     expected = "\n===============\n"
     expected += "Existing news item updated\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.updated_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: An Updated Story\n"
     expected += "---------------\n"
     expected += 'Title changed from: "A Story" to: "An Updated Story"\n'
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
 def test_log_news_item_updated_with_content(self, mock_stdout):
     news_website = NewsWebsite('ABC News', "www.abc.com",
                                "www.abc.com/news")
     news_item = NewsItem("www.abc.com/news/a-story", "A Story",
                          "This is a story summary.", None, None,
                          news_website)
     ConsoleNewsLogger().log_news_item_updated(news_item, {'content': None})
     expected = "\n===============\n"
     expected += "Existing news item updated\n"
     expected += "---------------\n"
     expected += "Time: {}\n".format(news_item.updated_at)
     expected += "Website: ABC News\n"
     expected += "URL: www.abc.com/news/a-story\n"
     expected += "Title: A Story\n"
     expected += "Content: This is a story summary.\n"
     expected += "---------------\n"
     expected += 'Content changed from: "None" to: "This is a story summary."\n'
     expected += "===============\n"
     self.assertEqual(expected, mock_stdout.getvalue())
Ejemplo n.º 6
0
def news_scan_job():
    """Scans each of the configured websites for news items, logs added or updated items and persists them to the db

    :return:
    """
    sbs_web = get_first_or_create_news_website("SBS News",
                                               "https://www.sbs.com.au",
                                               "https://www.sbs.com.au/news/")
    abc_web = get_first_or_create_news_website("ABC News",
                                               "https://www.abc.net.au",
                                               "https://www.abc.net.au/news/")
    nine_web = get_first_or_create_news_website("Nine News",
                                                "https://www.9news.com.au/",
                                                "https://www.9news.com.au/")
    aus_web = get_first_or_create_news_website(
        "The Australian", "https://www.theaustralian.com.au/",
        "https://www.theaustralian.com.au/")

    news_scanners = [
        NewsScanner(sbs_web, sbs_news_item_parser.SbsNewsItemParser()),
        NewsScanner(abc_web, abc_news_item_parser.AbcNewsItemParser()),
        NewsScanner(nine_web, nine_news_item_parser.NineNewsItemParser()),
        NewsScanner(
            aus_web,
            the_australian_news_item_parser.TheAustralianNewsItemParser())
    ]

    session = Session()
    logger = ConsoleNewsLogger()
    logger.log_news_scan_starting(datetime.now())

    for scanner in news_scanners:
        for scanned_news_item in scanner.get_news_items():
            existing_item = session.query(NewsItem).filter(
                NewsItem.url == scanned_news_item.url,
                NewsItem.news_website == scanner.news_website).first()

            if existing_item is None:
                scanned_news_item.news_website = scanner.news_website
                logger.log_news_item_added(scanned_news_item)
                session.add(scanned_news_item)
                session.commit()
            else:
                item_changed = False
                original_properties = {}
                if existing_item.title != scanned_news_item.title and scanned_news_item.title is not None:
                    item_changed = True
                    original_properties['title'] = existing_item.title
                    existing_item.title = scanned_news_item.title

                if existing_item.content != scanned_news_item.content and scanned_news_item.content is not None:
                    item_changed = True
                    original_properties['content'] = existing_item.content
                    existing_item.content = scanned_news_item.content

                if existing_item.author != scanned_news_item.author and scanned_news_item.author is not None:
                    item_changed = True
                    original_properties['author'] = existing_item.author
                    existing_item.author = scanned_news_item.author

                if existing_item.topic != scanned_news_item.topic and scanned_news_item.topic is not None:
                    item_changed = True
                    original_properties['topic'] = existing_item.topic
                    existing_item.topic = scanned_news_item.topic

                if item_changed:
                    existing_item.updated_at = datetime.now()
                    session.add(existing_item)
                    session.commit()
                    logger.log_news_item_updated(existing_item,
                                                 original_properties)

    session.close()
    logger.log_news_scan_ending(datetime.now())