def test_remote_integration(self) -> None: """Run santity test on remote apis by calling the internal functions that integrate the CH and twitter data.""" db = self.db() config = mediawords.util.config.get_config() topic = mediawords.test.db.create_test_topic( db, "test_remote_integration") topic['ch_monitor_id'] = config['crimson_hexagon']['test_monitor_id'] db.update_by_id('topics', topic['topics_id'], topic) ttd = mediawords.tm.fetch_topic_tweets._add_topic_tweet_single_day( db, topic, datetime.datetime(year=2016, month=1, day=1), mediawords.tm.fetch_topic_tweets.CrimsonHexagon) max_tweets = 200 mediawords.tm.fetch_topic_tweets._fetch_tweets_for_day( db, mediawords.tm.fetch_topic_tweets.Twitter, topic, ttd, max_tweets=max_tweets) got_tts = db.query( "select * from topic_tweets where topic_tweet_days_id = %(a)s", { 'a': ttd['topic_tweet_days_id'] }).hashes() # for old ch monitors, lots of the tweets may be deleted assert len(got_tts) > max_tweets / 10 assert len(got_tts[0]['content']) > MIN_TEST_TWEET_LENGTH assert len(got_tts[0]['twitter_user']) > MIN_TEST_TWITTER_USER_LENGTH
def test_fetch_topic_tweets(self) -> None: """Run fetch_topic_tweet tests with test database.""" db = self.db() topic = mediawords.test.db.create_test_topic(db, 'test') test_dates = get_test_date_range() topic['start_date'] = test_dates[0] topic['end_date'] = test_dates[1] topic['ch_monitor_id'] = 123456 db.update_by_id('topics', topic['topics_id'], topic) mediawords.tm.fetch_topic_tweets.fetch_topic_tweets( db, topic['topics_id'], MockTwitter, MockCrimsonHexagon) topic_tweet_days = db.query("select * from topic_tweet_days").hashes() assert len(topic_tweet_days) == LOCAL_DATE_RANGE + 1 start_date = datetime.datetime.strptime(topic['start_date'], '%Y-%m-%d') test_days = [ start_date + datetime.timedelta(days=x) for x in range(0, LOCAL_DATE_RANGE) ] for d in test_days: topic_tweet_day = db.query( "select * from topic_tweet_days where topics_id = %(a)s and day = %(b)s", { 'a': topic['topics_id'], 'b': d }).hash() assert topic_tweet_day is not None validate_topic_tweets(db, topic_tweet_day) validate_topic_tweet_urls(db, topic)
def test_fetch_topic_tweets(self) -> None: """Run fetch_topic_tweet tests with test database.""" db = self.db() topic = mediawords.test.db.create.create_test_topic(db, 'test') topic = db.update_by_id('topics', topic['topics_id'], {'pattern': '.*'}) test_dates = get_test_date_range() topic['start_date'] = test_dates[0] topic['end_date'] = test_dates[1] topic['ch_monitor_id'] = 123456 db.update_by_id('topics', topic['topics_id'], topic) ftt.fetch_topic_tweets(db, topic['topics_id'], MockTwitter, MockCrimsonHexagon) topic_tweet_days = db.query("select * from topic_tweet_days").hashes() assert len(topic_tweet_days) == LOCAL_DATE_RANGE + 1 start_date = datetime.datetime.strptime(topic['start_date'], '%Y-%m-%d') test_days = [start_date + datetime.timedelta(days=x) for x in range(0, LOCAL_DATE_RANGE)] for d in test_days: topic_tweet_day = db.query( "select * from topic_tweet_days where topics_id = %(a)s and day = %(b)s", {'a': topic['topics_id'], 'b': d} ).hash() assert topic_tweet_day is not None validate_topic_tweets(db, topic_tweet_day) validate_topic_tweet_urls(db, topic)
def test_fetch_topic_tweets(self) -> None: """Run fetch_topic_tweet tests with test database.""" db = self.db() topic = mediawords.test.db.create.create_test_topic(db, 'test') topic = db.update_by_id('topics', topic['topics_id'], {'pattern': '.*'}) test_dates = get_test_date_range() topic['start_date'] = test_dates[0] topic['end_date'] = test_dates[1] db.update_by_id('topics', topic['topics_id'], topic) tsq = { 'topics_id': topic['topics_id'], 'platform': 'twitter', 'source': 'crimson_hexagon', 'query': 123456 } db.create('topic_seed_queries', tsq) db.update_by_id('topics', topic['topics_id'], {'platform': 'twitter'}) mediawords.tm.fetch_topic_tweets.fetch_meta_tweets_from_ch = mock_fetch_meta_tweets_from_ch mediawords.tm.fetch_topic_tweets.fetch_100_tweets = mock_fetch_100_tweets ftt.fetch_topic_tweets(db, topic['topics_id']) topic_tweet_days = db.query("select * from topic_tweet_days").hashes() assert len(topic_tweet_days) == LOCAL_DATE_RANGE + 1 start_date = datetime.datetime.strptime(topic['start_date'], '%Y-%m-%d') test_days = [ start_date + datetime.timedelta(days=x) for x in range(0, LOCAL_DATE_RANGE) ] for d in test_days: topic_tweet_day = db.query( "select * from topic_tweet_days where topics_id = %(a)s and day = %(b)s", { 'a': topic['topics_id'], 'b': d }).hash() assert topic_tweet_day is not None validate_topic_tweets(db, topic_tweet_day) validate_topic_tweet_urls(db, topic)
def _test_remote_integration(self, source, query, day) -> None: """Run santity test on remote apis.""" db = self.db() topic = mediawords.test.db.create.create_test_topic( db, "test_remote_integration") tsq = { 'topics_id': topic['topics_id'], 'platform': 'twitter', 'source': source, 'query': query } db.create('topic_seed_queries', tsq) topic['platform'] = 'twitter' topic['pattern'] = '.*' topic['start_date'] = day topic['end_date'] = day db.update_by_id('topics', topic['topics_id'], topic) # only fetch 200 tweets to make test quicker max_tweets = 200 ftt.fetch_topic_tweets(db, topic['topics_id'], max_tweets) # ttd_day = datetime.datetime(year=2016, month=1, day=1) # meta_tweets = ftt.fetch_meta_tweets(db, topic, ttd_day) # ttd = ftt._add_topic_tweet_single_day(db, topic, len(meta_tweets), ttd_day) # max_tweets = 100 # ftt._fetch_tweets_for_day(db, ttd, meta_tweets, max_tweets=max_tweets) got_tts = db.query("select * from topic_tweets").hashes() # for old ch monitors, lots of the tweets may be deleted assert len(got_tts) > max_tweets / 10 assert len(got_tts[0]['content']) > MIN_TEST_TWEET_LENGTH assert len(got_tts[0]['twitter_user']) > MIN_TEST_TWITTER_USER_LENGTH
def test_remote_integration(self) -> None: """Run santity test on remote apis by calling the internal functions that integrate the CH and twitter data.""" db = self.db() topic = mediawords.test.db.create.create_test_topic(db, "test_remote_integration") topic['ch_monitor_id'] = TEST_MONITOR_ID db.update_by_id('topics', topic['topics_id'], topic) ttd_day = datetime.datetime(year=2016, month=1, day=1) ttd = ftt._add_topic_tweet_single_day(db, topic, ttd_day, ftt.CrimsonHexagon) max_tweets = 200 ftt._fetch_tweets_for_day(db, ftt.Twitter, topic, ttd, max_tweets=max_tweets) got_tts = db.query( "select * from topic_tweets where topic_tweet_days_id = %(a)s", {'a': ttd['topic_tweet_days_id']}).hashes() # for old ch monitors, lots of the tweets may be deleted assert len(got_tts) > max_tweets / 10 assert len(got_tts[0]['content']) > MIN_TEST_TWEET_LENGTH assert len(got_tts[0]['twitter_user']) > MIN_TEST_TWITTER_USER_LENGTH