def test_log_news_scan_ending(self, mock_stdout): timestamp = datetime.now() ConsoleNewsLogger().log_news_scan_ending(timestamp) expected = "<<<<<<<<<<>>>>>>>>>>\n" expected += "Ending news scan at: {}\n".format(timestamp) expected += "<<<<<<<<<<>>>>>>>>>>\n" self.assertEqual(expected, mock_stdout.getvalue())
def test_log_news_item_added(self, mock_stdout): news_website = NewsWebsite('ABC News', "www.abc.com", "www.abc.com/news") news_item = NewsItem("www.abc.com/news/a-story", "A Story", None, None, None, news_website) ConsoleNewsLogger().log_news_item_added(news_item) expected = "\n===============\n" expected += "News item added\n" expected += "---------------\n" expected += "Time: {}\n".format(news_item.created_at) expected += "Website: ABC News\n" expected += "URL: www.abc.com/news/a-story\n" expected += "Title: A Story\n" expected += "===============\n" self.assertEqual(expected, mock_stdout.getvalue())
def test_log_news_item_updated_with_author(self, mock_stdout): news_website = NewsWebsite('ABC News', "www.abc.com", "www.abc.com/news") news_item = NewsItem("www.abc.com/news/a-story", "A Story", None, 'John Smith', None, news_website) ConsoleNewsLogger().log_news_item_updated(news_item, {'author': None}) expected = "\n===============\n" expected += "Existing news item updated\n" expected += "---------------\n" expected += "Time: {}\n".format(news_item.updated_at) expected += "Website: ABC News\n" expected += "URL: www.abc.com/news/a-story\n" expected += "Title: A Story\n" expected += "Author: John Smith\n" expected += "---------------\n" expected += 'Author changed from: "None" to: "John Smith"\n' expected += "===============\n" self.assertEqual(expected, mock_stdout.getvalue())
def test_log_news_item_updated_with_title(self, mock_stdout): news_website = NewsWebsite('ABC News', "www.abc.com", "www.abc.com/news") news_item = NewsItem("www.abc.com/news/a-story", "An Updated Story", None, None, None, news_website) ConsoleNewsLogger().log_news_item_updated(news_item, {'title': 'A Story'}) expected = "\n===============\n" expected += "Existing news item updated\n" expected += "---------------\n" expected += "Time: {}\n".format(news_item.updated_at) expected += "Website: ABC News\n" expected += "URL: www.abc.com/news/a-story\n" expected += "Title: An Updated Story\n" expected += "---------------\n" expected += 'Title changed from: "A Story" to: "An Updated Story"\n' expected += "===============\n" self.assertEqual(expected, mock_stdout.getvalue())
def test_log_news_item_updated_with_content(self, mock_stdout): news_website = NewsWebsite('ABC News', "www.abc.com", "www.abc.com/news") news_item = NewsItem("www.abc.com/news/a-story", "A Story", "This is a story summary.", None, None, news_website) ConsoleNewsLogger().log_news_item_updated(news_item, {'content': None}) expected = "\n===============\n" expected += "Existing news item updated\n" expected += "---------------\n" expected += "Time: {}\n".format(news_item.updated_at) expected += "Website: ABC News\n" expected += "URL: www.abc.com/news/a-story\n" expected += "Title: A Story\n" expected += "Content: This is a story summary.\n" expected += "---------------\n" expected += 'Content changed from: "None" to: "This is a story summary."\n' expected += "===============\n" self.assertEqual(expected, mock_stdout.getvalue())
def news_scan_job(): """Scans each of the configured websites for news items, logs added or updated items and persists them to the db :return: """ sbs_web = get_first_or_create_news_website("SBS News", "https://www.sbs.com.au", "https://www.sbs.com.au/news/") abc_web = get_first_or_create_news_website("ABC News", "https://www.abc.net.au", "https://www.abc.net.au/news/") nine_web = get_first_or_create_news_website("Nine News", "https://www.9news.com.au/", "https://www.9news.com.au/") aus_web = get_first_or_create_news_website( "The Australian", "https://www.theaustralian.com.au/", "https://www.theaustralian.com.au/") news_scanners = [ NewsScanner(sbs_web, sbs_news_item_parser.SbsNewsItemParser()), NewsScanner(abc_web, abc_news_item_parser.AbcNewsItemParser()), NewsScanner(nine_web, nine_news_item_parser.NineNewsItemParser()), NewsScanner( aus_web, the_australian_news_item_parser.TheAustralianNewsItemParser()) ] session = Session() logger = ConsoleNewsLogger() logger.log_news_scan_starting(datetime.now()) for scanner in news_scanners: for scanned_news_item in scanner.get_news_items(): existing_item = session.query(NewsItem).filter( NewsItem.url == scanned_news_item.url, NewsItem.news_website == scanner.news_website).first() if existing_item is None: scanned_news_item.news_website = scanner.news_website logger.log_news_item_added(scanned_news_item) session.add(scanned_news_item) session.commit() else: item_changed = False original_properties = {} if existing_item.title != scanned_news_item.title and scanned_news_item.title is not None: item_changed = True original_properties['title'] = existing_item.title existing_item.title = scanned_news_item.title if existing_item.content != scanned_news_item.content and scanned_news_item.content is not None: item_changed = True original_properties['content'] = existing_item.content existing_item.content = scanned_news_item.content if existing_item.author != scanned_news_item.author and scanned_news_item.author is not None: item_changed = True original_properties['author'] = existing_item.author existing_item.author = scanned_news_item.author if existing_item.topic != scanned_news_item.topic and scanned_news_item.topic is not None: item_changed = True original_properties['topic'] = existing_item.topic existing_item.topic = scanned_news_item.topic if item_changed: existing_item.updated_at = datetime.now() session.add(existing_item) session.commit() logger.log_news_item_updated(existing_item, original_properties) session.close() logger.log_news_scan_ending(datetime.now())