class TestPostCitationCreator(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()

    def test_execute(self):
        _author_guid1 = u'05cd2e04ffaf3c5dabd03d13b63afab6'
        post_content = u"InternetTV love it #wow https://t.co/tRRt https://t.co/Ao1KOOx77H"
        # "https://twitter.com/AmichaiStein1/status/725022086377431041/photo/1"
        self.create_post(_author_guid1, post_content)

        pc = PostCitationCreator(self._db)
        pc.execute()

        posts = self._db.get_posts_by_domain(u'Microblog')
        self.assertEqual(0, len(posts))

    def create_post(self, _author_guid1, post_content):
        post = Post()
        post.post_id = u'TestPost'
        post.author = u'TechmarketNG'
        post.guid = u'TestPost'
        post.url = u'Url_From'
        tempDate = u'2016-05-05 00:00:00'
        day = datetime.timedelta(1)
        post.date = datetime.datetime.strptime(tempDate,
                                               '%Y-%m-%d %H:%M:%S') + day
        post.domain = u'Microblog'
        post.author_guid = _author_guid1
        post.content = post_content
        post.xml_importer_insertion_date = datetime.datetime.now()
        self._db.addPost(post)
        self._db.commit()
Beispiel #2
0
class TestTimelineOverlapVisualizationGenerator(TestCase):
    def setUp(self):
        self.config = getConfig()
        self._db = DB()
        self._db.setUp()
        self.timeline_overlap = TimelineOverlapVisualizationGenerator()

        author1 = Author()
        author1.name = 'acquired_user'
        author1.domain = 'Microblog'
        author1.author_guid = 'acquired_user'
        author1.author_screen_name = 'acquired_user'
        author1.author_full_name = 'acquired_user'
        author1.author_osn_id = 1
        author1.created_at = datetime.datetime.now()
        author1.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author1.xml_importer_insertion_date = datetime.datetime.now()
        author1.author_type = 'bad_actor'
        author1.author_sub_type = 'acquired'
        self._db.add_author(author1)

        for i in range(1, 11):
            post1 = Post()
            post1.post_id = 'bad_post' + str(i)
            post1.author = 'acquired_user'
            post1.guid = 'bad_post' + str(i)
            post1.date = datetime.datetime.now()
            post1.domain = 'Microblog'
            post1.author_guid = 'acquired_user'
            post1.content = 'InternetTV love it' + str(i)
            post1.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post1)

        author = Author()
        author.name = 'TestUser1'
        author.domain = 'Microblog'
        author.author_guid = 'TestUser1'
        author.author_screen_name = 'TestUser1'
        author.author_full_name = 'TestUser1'
        author.author_osn_id = 2
        author.created_at = datetime.datetime.now()
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self._db.add_author(author)

        for i in range(1, 11):
            post = Post()
            post.post_id = 'TestPost' + str(i)
            post.author = 'TestUser1'
            post.guid = 'TestPost' + str(i)
            post.date = datetime.datetime.now()
            post.domain = 'Microblog'
            post.author_guid = 'TestUser1'
            post.content = 'InternetTV love it' + str(i)
            post.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post)

        self._db.commit()

    def test_generate_timeline_overlap_csv(self):
        self.timeline_overlap.setUp()
        self.timeline_overlap.generate_timeline_overlap_csv()
        author = self._db.get_author_by_author_guid('acquired_user')
        self.assertEqual(author.author_type, 'bad_actor')
        self.assertEqual(author.author_sub_type, 'acquired')
        pass

    def tearDown(self):
        self._db.session.close_all()
        self._db.session.close()
        self._db.deleteDB()
        self._db.session.close()
Beispiel #3
0
class TestOldTweetsCrawler(TestCase):
    # I checked the test at 21/08/2018 there is a chance that the return tweet count will change (I hope not)
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.tweets_crawler = OldTweetsCrawler(self._db)
        self.tweets_crawler._domain = u'Claim'
        self._add_author(u"author_guid")
        self._claims = {}

    def tearDown(self):
        self._db.session.close()

    def test_retrieve_tweets_by_content_between_dates_after(self):
        self._add_claim(u"post0", u"The Rock Running for President",
                        u"2017-02-03 00:00:00")
        self._db.commit()
        date_interval_dict = defaultdict(set)
        claim_date = self._claims[u"post0"].verdict_date
        until_date = str_to_date(u"2017-08-03 00:00:00")
        self.tweets_crawler._limit_start_date = True
        self.tweets_crawler._limit_end_date = True
        tweets = self.tweets_crawler._retrieve_tweets_between_dates(
            self._claims[u"post0"], u"The Rock Running for President",
            date_to_str(claim_date, "%Y-%m-%d"),
            date_to_str(until_date, "%Y-%m-%d"))
        tweets_date = map(lambda tweet: tweet.date, tweets)
        self.assertTrue(
            all([claim_date <= date < until_date for date in tweets_date]))
        self.assertGreaterEqual(100, len(tweets))

    def test_retrieve_tweets_by_content_between_dates_before(self):
        self._add_claim(u"post0", u"The Rock Running for President",
                        u"2017-02-03 00:00:00")
        self._db.commit()
        date_interval_dict = defaultdict(set)
        claim_date = self._claims[u"post0"].verdict_date
        since_date = str_to_date(u"2016-08-03 00:00:00")
        self.tweets_crawler._limit_start_date = True
        self.tweets_crawler._limit_end_date = True
        tweets = self.tweets_crawler._retrieve_tweets_between_dates(
            self._claims[u"post0"], u"The Rock Running for President",
            date_to_str(since_date, "%Y-%m-%d"),
            date_to_str(claim_date, "%Y-%m-%d"))
        tweets_date = map(lambda tweet: tweet.date, tweets)
        self.assertTrue(
            all([since_date <= date < claim_date for date in tweets_date]))
        self.assertGreaterEqual(100, len(tweets))

    def test_retrieve_tweets_by_content_between_dates_1_month_interval(self):
        self._add_claim(u"post0", u"The Rock Running for President",
                        u"2017-02-03 00:00:00")
        self._db.commit()
        since_date = str_to_date(u"2017-01-03 00:00:00")
        until_date = str_to_date(u"2017-03-03 00:00:00")
        self.tweets_crawler._limit_start_date = True
        self.tweets_crawler._limit_end_date = True
        self.tweets_crawler._max_num_tweets = 133
        self.tweets_crawler._month_interval = 1
        tweets = self.tweets_crawler._retrieve_old_tweets(
            self._claims[u"post0"], u"The Rock Running for President")

        tweets_date = map(lambda tweet: tweet.date, tweets)
        self.assertTrue(
            all([since_date <= date < until_date for date in tweets_date]))
        self.assertGreaterEqual(133, len(tweets))

    def test_retrieve_tweets_by_content_between_dates_no_limit_after(self):
        self._add_claim(u"post0", u"The Rock Running for President",
                        u"2017-02-03 00:00:00")
        self._db.commit()
        since_date = str_to_date(u"2017-01-03 00:00:00")
        until_date = str_to_date(u"2017-03-03 00:00:00")
        self.tweets_crawler._limit_start_date = True
        self.tweets_crawler._limit_end_date = False
        self.tweets_crawler._max_num_tweets = 250
        self.tweets_crawler._month_interval = 1
        tweets = self.tweets_crawler._retrieve_old_tweets(
            self._claims[u"post0"], u"The Rock Running for President")

        tweets_date = map(lambda tweet: tweet.date, tweets)
        self.assertTrue(all([since_date <= date for date in tweets_date]))
        self.assertGreaterEqual(250, len(tweets))

    def test_retrieve_tweets_by_content_between_dates_no_limit_before(self):
        self._add_claim(u"post0", u"The Rock Running for President",
                        u"2017-02-03 00:00:00")
        self._db.commit()
        since_date = str_to_date(u"2017-01-03 00:00:00")
        until_date = str_to_date(u"2017-03-03 00:00:00")
        self.tweets_crawler._limit_start_date = False
        self.tweets_crawler._limit_end_date = True
        self.tweets_crawler._max_num_tweets = 250
        self.tweets_crawler._month_interval = 1
        tweets = self.tweets_crawler._retrieve_old_tweets(
            self._claims[u"post0"], u"The Rock Running for President")

        tweets_date = map(lambda tweet: tweet.date, tweets)
        self.assertTrue(all([date < until_date for date in tweets_date]))
        self.assertGreaterEqual(250, len(tweets))

    def test_execute_retrieve_tweets_by_full_content_1_month_interval(self):
        self._add_claim(
            u"post0", u"The Rock Running for President",
            u"2017-02-03 00:00:00",
            u"The Rock Running for President, Dwayne Running for President")
        self._db.commit()
        self.tweets_crawler._limit_start_date = True
        self.tweets_crawler._limit_end_date = True
        self.tweets_crawler._max_num_tweets = 133
        self.tweets_crawler._month_interval = 1
        self.tweets_crawler._actions = ['get_old_tweets_by_claims_content']
        self.tweets_crawler.execute()

        tweets_before = self.tweets_crawler._claim_id_tweets_id_before_dict[
            u"post0"]
        tweets_after = self.tweets_crawler._claim_id_tweets_id_after_dict[
            u"post0"]
        self.assertEqual(0, len(tweets_before & tweets_after))
        tweets_retrieved = len(tweets_before) + len(tweets_after)
        self.assertGreaterEqual(133, tweets_retrieved)
        self.assertEqual(tweets_retrieved, len(self._db.get_posts()))
        self.assertEqual(tweets_retrieved,
                         len(self._db.get_claim_tweet_connections()))
        self.assertLess(0, tweets_retrieved)

    def test_execute_retrieve_tweets_by_key_words_1_month_interval(self):
        self._add_claim(
            u"post0", u"The Rock Running for President",
            u"2017-02-03 00:00:00",
            u"The Rock Running for President,Dwayne Running for President")
        self._db.commit()
        self.tweets_crawler._limit_start_date = True
        self.tweets_crawler._limit_end_date = True
        self.tweets_crawler._max_num_tweets = 141
        self.tweets_crawler._month_interval = 1
        self.tweets_crawler._actions = ['get_old_tweets_by_claims_keywords']
        self.tweets_crawler.execute()

        tweets_before = self.tweets_crawler._claim_id_tweets_id_before_dict[
            u"post0"]
        tweets_after = self.tweets_crawler._claim_id_tweets_id_after_dict[
            u"post0"]
        self.assertEqual(0, len(tweets_before & tweets_after))
        tweets_retrieved = len(tweets_before) + len(tweets_after)
        self.assertGreaterEqual(141 * 3, tweets_retrieved)
        self.assertEqual(tweets_retrieved, len(self._db.get_posts()))
        self.assertEqual(tweets_retrieved,
                         len(self._db.get_claim_tweet_connections()))
        self.assertLess(0, tweets_retrieved)

    def _add_author(self, author_guid):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = u'test author'
        author.author_screen_name = author_guid
        author.name = u'test'
        author.domain = u'tests'
        author.statuses_count = 0
        author.created_at = u"2017-06-14 05:00:00"
        self._db.add_author(author)
        self._author = author

    def _add_post(self, post_id, content, tags, date_str, domain=u'Microblog'):
        post = Post()
        post.author = self._author.author_guid
        post.author_guid = self._author.author_guid
        post.content = content
        post.title = post_id
        post.domain = domain
        post.post_id = post_id
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime(date_str)
        post.created_at = post.date
        post.tags = tags
        self._db.addPost(post)
        self._author.statuses_count += 1

    def _add_claim(self,
                   claim_id,
                   content,
                   date_str,
                   keywords=u"",
                   post_type=None):
        claim = Claim()
        claim.claim_id = claim_id
        claim.verdict = post_type
        claim.title = claim_id
        claim.description = content
        claim.verdict_date = convert_str_to_unicode_datetime(date_str)
        claim.keywords = keywords
        claim.url = u"claim url"
        self._db.addPost(claim)
        self._claims[claim.claim_id] = claim