Ejemplo n.º 1
0
    def test_retrieve_tweets_by_content_between_dates_no_limit_before(self):
        self._add_claim(u"post0", u"The Rock Running for President",
                        u"2017-02-03 00:00:00")
        self._db.commit()
        since_date = str_to_date(u"2017-01-03 00:00:00")
        until_date = str_to_date(u"2017-03-03 00:00:00")
        self.tweets_crawler._limit_start_date = False
        self.tweets_crawler._limit_end_date = True
        self.tweets_crawler._max_num_tweets = 250
        self.tweets_crawler._month_interval = 1
        tweets = self.tweets_crawler._retrieve_old_tweets(
            self._claims[u"post0"], u"The Rock Running for President")

        tweets_date = map(lambda tweet: tweet.date, tweets)
        self.assertTrue(all([date < until_date for date in tweets_date]))
        self.assertGreaterEqual(250, len(tweets))
    def _add_post(self, author, date, post_osn_id, score=0, upvote_ratio=-1):
        post = Post()
        post.post_osn_id = post_osn_id
        post.author = str(author)
        post.author_guid = compute_author_guid_by_author_name(post.author)
        post.created_at = str_to_date(date, formate="%d/%m/%Y %H:%M")
        post.url = 'https://www.reddit.com{}'.format(
            post.author)  # just for test
        post.guid = compute_post_guid(post.url, post.post_osn_id,
                                      date_to_str(post.created_at))
        post.domain = 'reddit_comment'
        post.post_type = 'reddit_comment'
        post.post_id = post.guid

        reddit_post = RedditPost()
        reddit_post.post_id = post.post_id
        reddit_post.guid = post.guid
        reddit_post.score = score
        if upvote_ratio != -1:
            post.domain = 'reddit_post'
            post.post_type = 'reddit_post'
            reddit_post.upvote_ratio = upvote_ratio
            reddit_post.ups = int(
                round((reddit_post.upvote_ratio * reddit_post.score) /
                      (2 * reddit_post.upvote_ratio - 1)) if
                reddit_post.upvote_ratio != 0.5 else round(reddit_post.score /
                                                           2))
            reddit_post.downs = reddit_post.ups - reddit_post.score
        else:
            reddit_post.ups = -1
            reddit_post.downs = -1
            reddit_post.upvote_ratio = -1

        self._db.addPosts([post, reddit_post])
        return post, reddit_post
Ejemplo n.º 3
0
 def test_retrieve_tweets_by_content_between_dates_before(self):
     self._add_claim(u"post0", u"The Rock Running for President",
                     u"2017-02-03 00:00:00")
     self._db.commit()
     date_interval_dict = defaultdict(set)
     claim_date = self._claims[u"post0"].verdict_date
     since_date = str_to_date(u"2016-08-03 00:00:00")
     self.tweets_crawler._limit_start_date = True
     self.tweets_crawler._limit_end_date = True
     tweets = self.tweets_crawler._retrieve_tweets_between_dates(
         self._claims[u"post0"], u"The Rock Running for President",
         date_to_str(since_date, "%Y-%m-%d"),
         date_to_str(claim_date, "%Y-%m-%d"))
     tweets_date = map(lambda tweet: tweet.date, tweets)
     self.assertTrue(
         all([since_date <= date < claim_date for date in tweets_date]))
     self.assertGreaterEqual(100, len(tweets))