class TestPostCitationCreator(TestCase): def setUp(self): self._db = DB() self._db.setUp() def test_execute(self): _author_guid1 = u'05cd2e04ffaf3c5dabd03d13b63afab6' post_content = u"InternetTV love it #wow https://t.co/tRRt https://t.co/Ao1KOOx77H" # "https://twitter.com/AmichaiStein1/status/725022086377431041/photo/1" self.create_post(_author_guid1, post_content) pc = PostCitationCreator(self._db) pc.execute() posts = self._db.get_posts_by_domain(u'Microblog') self.assertEqual(0, len(posts)) def create_post(self, _author_guid1, post_content): post = Post() post.post_id = u'TestPost' post.author = u'TechmarketNG' post.guid = u'TestPost' post.url = u'Url_From' tempDate = u'2016-05-05 00:00:00' day = datetime.timedelta(1) post.date = datetime.datetime.strptime(tempDate, '%Y-%m-%d %H:%M:%S') + day post.domain = u'Microblog' post.author_guid = _author_guid1 post.content = post_content post.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post) self._db.commit()
class TestTimelineOverlapVisualizationGenerator(TestCase): def setUp(self): self.config = getConfig() self._db = DB() self._db.setUp() self.timeline_overlap = TimelineOverlapVisualizationGenerator() author1 = Author() author1.name = 'acquired_user' author1.domain = 'Microblog' author1.author_guid = 'acquired_user' author1.author_screen_name = 'acquired_user' author1.author_full_name = 'acquired_user' author1.author_osn_id = 1 author1.created_at = datetime.datetime.now() author1.missing_data_complementor_insertion_date = datetime.datetime.now( ) author1.xml_importer_insertion_date = datetime.datetime.now() author1.author_type = 'bad_actor' author1.author_sub_type = 'acquired' self._db.add_author(author1) for i in range(1, 11): post1 = Post() post1.post_id = 'bad_post' + str(i) post1.author = 'acquired_user' post1.guid = 'bad_post' + str(i) post1.date = datetime.datetime.now() post1.domain = 'Microblog' post1.author_guid = 'acquired_user' post1.content = 'InternetTV love it' + str(i) post1.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post1) author = Author() author.name = 'TestUser1' author.domain = 'Microblog' author.author_guid = 'TestUser1' author.author_screen_name = 'TestUser1' author.author_full_name = 'TestUser1' author.author_osn_id = 2 author.created_at = datetime.datetime.now() author.missing_data_complementor_insertion_date = datetime.datetime.now( ) author.xml_importer_insertion_date = datetime.datetime.now() self._db.add_author(author) for i in range(1, 11): post = Post() post.post_id = 'TestPost' + str(i) post.author = 'TestUser1' post.guid = 'TestPost' + str(i) post.date = datetime.datetime.now() post.domain = 'Microblog' post.author_guid = 'TestUser1' post.content = 'InternetTV love it' + str(i) post.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post) self._db.commit() def test_generate_timeline_overlap_csv(self): self.timeline_overlap.setUp() self.timeline_overlap.generate_timeline_overlap_csv() author = self._db.get_author_by_author_guid('acquired_user') self.assertEqual(author.author_type, 'bad_actor') self.assertEqual(author.author_sub_type, 'acquired') pass def tearDown(self): self._db.session.close_all() self._db.session.close() self._db.deleteDB() self._db.session.close()
class TestOldTweetsCrawler(TestCase): # I checked the test at 21/08/2018 there is a chance that the return tweet count will change (I hope not) def setUp(self): self._db = DB() self._db.setUp() self.tweets_crawler = OldTweetsCrawler(self._db) self.tweets_crawler._domain = u'Claim' self._add_author(u"author_guid") self._claims = {} def tearDown(self): self._db.session.close() def test_retrieve_tweets_by_content_between_dates_after(self): self._add_claim(u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00") self._db.commit() date_interval_dict = defaultdict(set) claim_date = self._claims[u"post0"].verdict_date until_date = str_to_date(u"2017-08-03 00:00:00") self.tweets_crawler._limit_start_date = True self.tweets_crawler._limit_end_date = True tweets = self.tweets_crawler._retrieve_tweets_between_dates( self._claims[u"post0"], u"The Rock Running for President", date_to_str(claim_date, "%Y-%m-%d"), date_to_str(until_date, "%Y-%m-%d")) tweets_date = map(lambda tweet: tweet.date, tweets) self.assertTrue( all([claim_date <= date < until_date for date in tweets_date])) self.assertGreaterEqual(100, len(tweets)) def test_retrieve_tweets_by_content_between_dates_before(self): self._add_claim(u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00") self._db.commit() date_interval_dict = defaultdict(set) claim_date = self._claims[u"post0"].verdict_date since_date = str_to_date(u"2016-08-03 00:00:00") self.tweets_crawler._limit_start_date = True self.tweets_crawler._limit_end_date = True tweets = self.tweets_crawler._retrieve_tweets_between_dates( self._claims[u"post0"], u"The Rock Running for President", date_to_str(since_date, "%Y-%m-%d"), date_to_str(claim_date, "%Y-%m-%d")) tweets_date = map(lambda tweet: tweet.date, tweets) self.assertTrue( all([since_date <= date < claim_date for date in tweets_date])) self.assertGreaterEqual(100, len(tweets)) def test_retrieve_tweets_by_content_between_dates_1_month_interval(self): self._add_claim(u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00") self._db.commit() since_date = str_to_date(u"2017-01-03 00:00:00") until_date = str_to_date(u"2017-03-03 00:00:00") self.tweets_crawler._limit_start_date = True self.tweets_crawler._limit_end_date = True self.tweets_crawler._max_num_tweets = 133 self.tweets_crawler._month_interval = 1 tweets = self.tweets_crawler._retrieve_old_tweets( self._claims[u"post0"], u"The Rock Running for President") tweets_date = map(lambda tweet: tweet.date, tweets) self.assertTrue( all([since_date <= date < until_date for date in tweets_date])) self.assertGreaterEqual(133, len(tweets)) def test_retrieve_tweets_by_content_between_dates_no_limit_after(self): self._add_claim(u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00") self._db.commit() since_date = str_to_date(u"2017-01-03 00:00:00") until_date = str_to_date(u"2017-03-03 00:00:00") self.tweets_crawler._limit_start_date = True self.tweets_crawler._limit_end_date = False self.tweets_crawler._max_num_tweets = 250 self.tweets_crawler._month_interval = 1 tweets = self.tweets_crawler._retrieve_old_tweets( self._claims[u"post0"], u"The Rock Running for President") tweets_date = map(lambda tweet: tweet.date, tweets) self.assertTrue(all([since_date <= date for date in tweets_date])) self.assertGreaterEqual(250, len(tweets)) def test_retrieve_tweets_by_content_between_dates_no_limit_before(self): self._add_claim(u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00") self._db.commit() since_date = str_to_date(u"2017-01-03 00:00:00") until_date = str_to_date(u"2017-03-03 00:00:00") self.tweets_crawler._limit_start_date = False self.tweets_crawler._limit_end_date = True self.tweets_crawler._max_num_tweets = 250 self.tweets_crawler._month_interval = 1 tweets = self.tweets_crawler._retrieve_old_tweets( self._claims[u"post0"], u"The Rock Running for President") tweets_date = map(lambda tweet: tweet.date, tweets) self.assertTrue(all([date < until_date for date in tweets_date])) self.assertGreaterEqual(250, len(tweets)) def test_execute_retrieve_tweets_by_full_content_1_month_interval(self): self._add_claim( u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00", u"The Rock Running for President, Dwayne Running for President") self._db.commit() self.tweets_crawler._limit_start_date = True self.tweets_crawler._limit_end_date = True self.tweets_crawler._max_num_tweets = 133 self.tweets_crawler._month_interval = 1 self.tweets_crawler._actions = ['get_old_tweets_by_claims_content'] self.tweets_crawler.execute() tweets_before = self.tweets_crawler._claim_id_tweets_id_before_dict[ u"post0"] tweets_after = self.tweets_crawler._claim_id_tweets_id_after_dict[ u"post0"] self.assertEqual(0, len(tweets_before & tweets_after)) tweets_retrieved = len(tweets_before) + len(tweets_after) self.assertGreaterEqual(133, tweets_retrieved) self.assertEqual(tweets_retrieved, len(self._db.get_posts())) self.assertEqual(tweets_retrieved, len(self._db.get_claim_tweet_connections())) self.assertLess(0, tweets_retrieved) def test_execute_retrieve_tweets_by_key_words_1_month_interval(self): self._add_claim( u"post0", u"The Rock Running for President", u"2017-02-03 00:00:00", u"The Rock Running for President,Dwayne Running for President") self._db.commit() self.tweets_crawler._limit_start_date = True self.tweets_crawler._limit_end_date = True self.tweets_crawler._max_num_tweets = 141 self.tweets_crawler._month_interval = 1 self.tweets_crawler._actions = ['get_old_tweets_by_claims_keywords'] self.tweets_crawler.execute() tweets_before = self.tweets_crawler._claim_id_tweets_id_before_dict[ u"post0"] tweets_after = self.tweets_crawler._claim_id_tweets_id_after_dict[ u"post0"] self.assertEqual(0, len(tweets_before & tweets_after)) tweets_retrieved = len(tweets_before) + len(tweets_after) self.assertGreaterEqual(141 * 3, tweets_retrieved) self.assertEqual(tweets_retrieved, len(self._db.get_posts())) self.assertEqual(tweets_retrieved, len(self._db.get_claim_tweet_connections())) self.assertLess(0, tweets_retrieved) def _add_author(self, author_guid): author = Author() author.author_guid = author_guid author.author_full_name = u'test author' author.author_screen_name = author_guid author.name = u'test' author.domain = u'tests' author.statuses_count = 0 author.created_at = u"2017-06-14 05:00:00" self._db.add_author(author) self._author = author def _add_post(self, post_id, content, tags, date_str, domain=u'Microblog'): post = Post() post.author = self._author.author_guid post.author_guid = self._author.author_guid post.content = content post.title = post_id post.domain = domain post.post_id = post_id post.guid = post.post_id post.date = convert_str_to_unicode_datetime(date_str) post.created_at = post.date post.tags = tags self._db.addPost(post) self._author.statuses_count += 1 def _add_claim(self, claim_id, content, date_str, keywords=u"", post_type=None): claim = Claim() claim.claim_id = claim_id claim.verdict = post_type claim.title = claim_id claim.description = content claim.verdict_date = convert_str_to_unicode_datetime(date_str) claim.keywords = keywords claim.url = u"claim url" self._db.addPost(claim) self._claims[claim.claim_id] = claim