def test_create_and_publish_record(self): setup_test_case() news_feed = newscatcher_helper.retrieve_feed("latimes.com") today = datetime.now(timezone.utc) yesterday_timestamp = (today - timedelta(days=1)).isoformat() self.assertIsNone( newscatcher_helper.create_and_publish_record( news_feed, "fakeaccount", "newscatcher", last_published_timestamp=yesterday_timestamp, ))
def test_published_date(self): setup_test_case() news_feed = newscatcher_helper.retrieve_feed("cnn.com") today = datetime.now(timezone.utc) yesterday_timestamp = (today - timedelta(days=1)).isoformat() for article in news_feed["articles"]: article.pop("published_parsed", None) self.assertIsNone( newscatcher_helper.create_and_publish_record( news_feed, "fakeaccount", "newscatcher", last_published_timestamp=yesterday_timestamp, ))
def process_config_handler(event, context): if event["source"] == os.environ["INGESTION_NAMESPACE"]: data = event["detail"] aggregated_feed = None if data.get("topic", None) is None: logger.debug( "Since topic is none, gettting news feed for all available topics" ) aggregated_feed = retrieve_feed_from_all_topics(data["url"]) else: logger.debug(f"Retrieving news feed for topic: {data['topic']}") aggregated_feed = retrieve_feed(data.url, topic=data["topic"]) for feed in aggregated_feed: account = data["account"] url = data["url"] search_query = data.get("query", None) # if search_query is set empty or as ALL or as '*', it not filter any records, hence setting it as None if search_query == "" or search_query == "ALL" or search_query == "*": search_query = None tracker = get_query_tracker(account, url, search_query, feed["topic"]) create_and_publish_record(feed, account, data["platform"], tracker["LAST_PUBLISHED_TIMESTAMP"], search_query) ddb_helper.update_query_tracker(account, feed["url"], search_query, feed["topic"]) else: logger.error("Target resource not configured for received namespace") raise IncorrectEventNameSpaceError( "Target resource not configured for received namespace")
def test_retrieve_feed_with_invalid_topic(self): with self.assertRaises(newscatcher_helper.TopicNotSupportedError): newscatcher_helper.retrieve_feed("nytimes.com", topic="faketopic")
def test_retrieve_feed(self): url = "nytimes.com" news_feed = newscatcher_helper.retrieve_feed(url) self.assertEqual(news_feed["url"], url) self.assertEqual(news_feed["language"], "en")