def test_create_and_publish_record(self):
     setup_test_case()
     news_feed = newscatcher_helper.retrieve_feed("latimes.com")
     today = datetime.now(timezone.utc)
     yesterday_timestamp = (today - timedelta(days=1)).isoformat()
     self.assertIsNone(
         newscatcher_helper.create_and_publish_record(
             news_feed,
             "fakeaccount",
             "newscatcher",
             last_published_timestamp=yesterday_timestamp,
         ))
    def test_published_date(self):
        setup_test_case()
        news_feed = newscatcher_helper.retrieve_feed("cnn.com")
        today = datetime.now(timezone.utc)
        yesterday_timestamp = (today - timedelta(days=1)).isoformat()

        for article in news_feed["articles"]:
            article.pop("published_parsed", None)

        self.assertIsNone(
            newscatcher_helper.create_and_publish_record(
                news_feed,
                "fakeaccount",
                "newscatcher",
                last_published_timestamp=yesterday_timestamp,
            ))
Esempio n. 3
0
def process_config_handler(event, context):
    if event["source"] == os.environ["INGESTION_NAMESPACE"]:
        data = event["detail"]

        aggregated_feed = None
        if data.get("topic", None) is None:
            logger.debug(
                "Since topic is none, gettting news feed for all available topics"
            )
            aggregated_feed = retrieve_feed_from_all_topics(data["url"])
        else:
            logger.debug(f"Retrieving news feed for topic: {data['topic']}")
            aggregated_feed = retrieve_feed(data.url, topic=data["topic"])

        for feed in aggregated_feed:
            account = data["account"]
            url = data["url"]
            search_query = data.get("query", None)

            # if search_query is set empty or as ALL or as '*', it not filter any records, hence setting  it as None
            if search_query == "" or search_query == "ALL" or search_query == "*":
                search_query = None

            tracker = get_query_tracker(account, url, search_query,
                                        feed["topic"])
            create_and_publish_record(feed, account, data["platform"],
                                      tracker["LAST_PUBLISHED_TIMESTAMP"],
                                      search_query)

            ddb_helper.update_query_tracker(account, feed["url"], search_query,
                                            feed["topic"])

    else:
        logger.error("Target resource not configured for received namespace")
        raise IncorrectEventNameSpaceError(
            "Target resource not configured for received namespace")
 def test_retrieve_feed_with_invalid_topic(self):
     with self.assertRaises(newscatcher_helper.TopicNotSupportedError):
         newscatcher_helper.retrieve_feed("nytimes.com", topic="faketopic")
 def test_retrieve_feed(self):
     url = "nytimes.com"
     news_feed = newscatcher_helper.retrieve_feed(url)
     self.assertEqual(news_feed["url"], url)
     self.assertEqual(news_feed["language"], "en")