def test_retrieve_tweets_by_content_between_dates_no_limit_before(self): self._add_claim(u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00") self._db.commit() since_date = str_to_date(u"2017-01-03 00:00:00") until_date = str_to_date(u"2017-03-03 00:00:00") self.tweets_crawler._limit_start_date = False self.tweets_crawler._limit_end_date = True self.tweets_crawler._max_num_tweets = 250 self.tweets_crawler._month_interval = 1 tweets = self.tweets_crawler._retrieve_old_tweets( self._claims[u"post0"], u"The Rock Running for President") tweets_date = map(lambda tweet: tweet.date, tweets) self.assertTrue(all([date < until_date for date in tweets_date])) self.assertGreaterEqual(250, len(tweets))
def _add_post(self, author, date, post_osn_id, score=0, upvote_ratio=-1): post = Post() post.post_osn_id = post_osn_id post.author = str(author) post.author_guid = compute_author_guid_by_author_name(post.author) post.created_at = str_to_date(date, formate="%d/%m/%Y %H:%M") post.url = 'https://www.reddit.com{}'.format( post.author) # just for test post.guid = compute_post_guid(post.url, post.post_osn_id, date_to_str(post.created_at)) post.domain = 'reddit_comment' post.post_type = 'reddit_comment' post.post_id = post.guid reddit_post = RedditPost() reddit_post.post_id = post.post_id reddit_post.guid = post.guid reddit_post.score = score if upvote_ratio != -1: post.domain = 'reddit_post' post.post_type = 'reddit_post' reddit_post.upvote_ratio = upvote_ratio reddit_post.ups = int( round((reddit_post.upvote_ratio * reddit_post.score) / (2 * reddit_post.upvote_ratio - 1)) if reddit_post.upvote_ratio != 0.5 else round(reddit_post.score / 2)) reddit_post.downs = reddit_post.ups - reddit_post.score else: reddit_post.ups = -1 reddit_post.downs = -1 reddit_post.upvote_ratio = -1 self._db.addPosts([post, reddit_post]) return post, reddit_post
def test_retrieve_tweets_by_content_between_dates_before(self): self._add_claim(u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00") self._db.commit() date_interval_dict = defaultdict(set) claim_date = self._claims[u"post0"].verdict_date since_date = str_to_date(u"2016-08-03 00:00:00") self.tweets_crawler._limit_start_date = True self.tweets_crawler._limit_end_date = True tweets = self.tweets_crawler._retrieve_tweets_between_dates( self._claims[u"post0"], u"The Rock Running for President", date_to_str(since_date, "%Y-%m-%d"), date_to_str(claim_date, "%Y-%m-%d")) tweets_date = map(lambda tweet: tweet.date, tweets) self.assertTrue( all([since_date <= date < claim_date for date in tweets_date])) self.assertGreaterEqual(100, len(tweets))