class TestLeadspottingPostsImporter(TestCase): def setUp(self): self._db = DB() self._db.setUp() self.importer = LeadspottingPostsImporter(self._db) def test_skips_one_file_in_folder(self): print '\ntest skips one file in folder:' self.assertEqual(len(self._db.get_all_posts()), 0) self.importer.setUp() self.importer._data_folder = 'data/input/datasets/unittests/folder1/' self.importer.execute() self.assertEqual(len(self._db.get_all_posts()), 3) valid_post_id = '1a0dccaf-7fe1-38b1-a233-a3fc0dd550a1' invalid_post_id = '758e5131-7eec-32c9-b9f3-44484a5cb234' self.assertTrue(self._db.get_post_by_id(valid_post_id)) self.assertFalse(self._db.get_post_by_id(invalid_post_id)) def test_incorrect_fieds(self): #tests that importer doesn't insert to db posts from csv missing essential fields print 'test incorrect fields' self.importer.setUp() all_posts_in_db = self._db.get_all_posts() self.assertEqual(len(all_posts_in_db), 0) self.importer._data_folder = 'data/input/datasets/unittests/incorrect_fields/' self.importer.execute() all_posts_in_db = self._db.get_all_posts() self.assertEqual(len(all_posts_in_db), 0) def tearDown(self): self._db.session.close() self._db.deleteDB() self._db.session.close()
def testDBSetUp(self): from sqlalchemy.engine.reflection import Inspector db = DB() db.setUp() session = db.Session() inspector = Inspector.from_engine(db.engine) self.assertTrue("posts" in set(inspector.get_table_names()))
def testLogarithm(self): db = DB() db.setUp() session = db.Session() recs = session.execute("select log(10) as q;") for rec in recs: self.assertAlmostEqual(rec[0],2.302585092994046)
class TestPolitiFactPostsCrawler(TestCase): def setUp(self): self.db = DB() self.db.setUp() self.posts_crawler = PolitiFactPostsCrawler(self.db) def test_execute(self): self.posts_crawler.execute(None) posts = self.posts_crawler._listdic expected_url = "http://www.politifact.com/truth-o-meter/statements/2018/apr/06/blog-posting/fake-news-says-cnn-pulled-plug-during-youtube-shoo/" found = False for post in posts: if post["url"] == expected_url: self.assertEqual(post["date"], "2018-04-04 00:00:00") self.assertEqual(post["title"], "Fake news says CNN \"pulled the plug\" during YouTube shooting coverage") self.assertEqual(post["created_at"], "2018-04-04 00:00:00") self.assertEqual(post["content"], "YouTube witness makes shocking admission on live TV CNN pulls plug immediately") self.assertEqual(post["domain"], "PolitiFact") self.assertTrue(post["guid"] is not None) self.assertTrue(post["post_guid"] is not None) self.assertTrue(post["post_id"] is not None) self.assertTrue(post["author_guid"] is not None) self.assertEqual(post["author"], expected_url) self.assertTrue(post["author_osn_id"] is not None) self.assertEqual(post["references"], "") self.assertEqual(post["post_type"], "false") found = True if not found: self.fail() def test__build_qury_for_subject(self): query = self.posts_crawler._build_qury_for_subject("apple") self.assertEqual(query, "http://www.politifact.com/api/statements/truth-o-meter/subjects/apple/json/?n=10")
def setUp(self): TestBase.setUp(self) self.config = getConfig() self._start_date = self.config.eval("DEFAULT", "start_date") #self._end_date = self.config.get("DEFAULT", "end_date") self._tsv_files_path = self.config.get("TumblrImporter", "tsv_test_files_account_properties_feature_generator") self._db = DB() self._db.setUp() self._tumblr_parser = TumblrImporter(self._db) self._tumblr_parser.setUp(self._tsv_files_path) self._tumblr_parser.execute() self._author_guid = "f0f4bb42-3fed-322a-b71a-681179d47ea1" authors = self._db.get_authors_by_domain(Domains.MICROBLOG) posts = self._db.get_posts_by_domain(Domains.MICROBLOG) parameters = {"authors": authors, "posts": posts} account_properties_feature_generator = AccountPropertiesFeatureGenerator(self._db, **parameters) account_properties_feature_generator.execute() self._author_features = self._db.get_author_features_by_author_guid(author_guid=self._author_guid) self._author_features_dict = self._create_author_features_dictionary(self._author_features)
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._author = None self.instagram_crawler = InstagramCrawler(self._db) self.instagram_crawler.insta_crawler.get_json = types.MethodType(f, self.instagram_crawler.insta_crawler)
def setUp(self): TestBase.setUp(self) self.config = getConfig() self._start_date = self.config.eval("DEFAULT", "start_date") #self._end_date = self.config.get("DEFAULT", "end_date") self._tsv_files_path = self.config.get( "TumblrImporter", "tsv_test_files_graph_feature_generator") self._db = DB() self._db.setUp() self._tumblr_parser = TumblrImporter(self._db) self._tumblr_parser.setUp(self._tsv_files_path) self._tumblr_parser.execute() self._author_guid = u"f0f4bb42-3fed-322a-b71a-681179d47ea1" authors = self._db.get_authors_by_domain(Domains.MICROBLOG) posts = self._db.get_posts_by_domain(Domains.MICROBLOG) parameters = {"authors": authors, "posts": posts} graph_types = self.config.eval("GraphFeatureGenerator_1", "graph_types") algorithms = self.config.eval("GraphFeatureGenerator_1", "algorithms") aggregations = self.config.eval("GraphFeatureGenerator_1", "aggregation_functions") neighborhood_sizes = self.config.eval("GraphFeatureGenerator_1", "neighborhood_sizes") distances_from_labeled_authors = self.config.eval( "GraphFeatureGenerator_1", "distances_from_labeled_authors") graph_directed = self.config.eval("GraphFeatureGenerator_1", "graph_directed") graph_weights = self.config.eval("GraphFeatureGenerator_1", "graph_weights") parameters.update({ "graph_types": graph_types, "algorithms": algorithms, "aggregation_functions": aggregations, "neighborhood_sizes": neighborhood_sizes, "graph_directed": graph_directed, "graph_weights": graph_weights, "distances_from_labeled_authors": distances_from_labeled_authors }) graph_feature_generator = GraphFeatureGenerator(self._db, **parameters) graph_feature_generator.execute() self._author_features = self._db.get_author_features_by_author_guid( author_guid=self._author_guid) self._author_features_dict = self._create_author_features_dictionary( self._author_features)
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._author = None self.reddit_crawler = RedditCrawler(self._db) self.reddit_crawler.reddit = RedditStub()
def setUp(self): self._db = DB() self._db.setUp() self.tweets_crawler = OldTweetsCrawler(self._db) self.tweets_crawler._domain = u'Claim' self._add_author(u"author_guid") self._claims = {}
def setUp(self): self._db = DB() self._db.setUp() self._domain = u'test' self._posts = [] self._authors = [] self._texts = []
def setUp(self): self.config = getConfig() self._db = DB() self._db.setUp() self.timeline_overlap = TimelineOverlapVisualizationGenerator() author1 = Author() author1.name = 'acquired_user' author1.domain = 'Microblog' author1.author_guid = 'acquired_user' author1.author_screen_name = 'acquired_user' author1.author_full_name = 'acquired_user' author1.author_osn_id = 1 author1.created_at = datetime.datetime.now() author1.missing_data_complementor_insertion_date = datetime.datetime.now( ) author1.xml_importer_insertion_date = datetime.datetime.now() author1.author_type = 'bad_actor' author1.author_sub_type = 'acquired' self._db.add_author(author1) for i in range(1, 11): post1 = Post() post1.post_id = 'bad_post' + str(i) post1.author = 'acquired_user' post1.guid = 'bad_post' + str(i) post1.date = datetime.datetime.now() post1.domain = 'Microblog' post1.author_guid = 'acquired_user' post1.content = 'InternetTV love it' + str(i) post1.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post1) author = Author() author.name = 'TestUser1' author.domain = 'Microblog' author.author_guid = 'TestUser1' author.author_screen_name = 'TestUser1' author.author_full_name = 'TestUser1' author.author_osn_id = 2 author.created_at = datetime.datetime.now() author.missing_data_complementor_insertion_date = datetime.datetime.now( ) author.xml_importer_insertion_date = datetime.datetime.now() self._db.add_author(author) for i in range(1, 11): post = Post() post.post_id = 'TestPost' + str(i) post.author = 'TestUser1' post.guid = 'TestPost' + str(i) post.date = datetime.datetime.now() post.domain = 'Microblog' post.author_guid = 'TestUser1' post.content = 'InternetTV love it' + str(i) post.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post) self._db.commit()
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._claim_dictionary = {} self._authors = [] self._add_author(u'test author') self._preprocess_visualization = ClaimToTopicConverter(self._db)
def setUp(self): self.normalize_actor_guid = "00f888bdfe92039ccbc440ab27b7804040f195e9dc367bc077270033" self.not_normalize_author_guid = "0cc3fd06f73d6613dec1e4e31bcd7c4efd430df3b00dd7fe092cfa5b" self._db = DB() self._db.setUp() self.rank_app_importer = RankAppImporter(self._db) self.rank_app_importer.setUp()
def setUp(self): self._db = DB() self._db.setUp() self.fake_news_snopes_importer = FakeNewsSnopesImporter(self._db) self.fake_news_snopes_importer._input_csv_file = 'data/input/FakeNewsSnopesImporter/Fake_News_Snopes_V3.csv' self.fake_news_snopes_importer.execute() self.keywords_generator = KeywordsGenerator(self._db)
class TestXmlImporter(TestBase): def setUp(self): TestBase.setUp(self) self.config = getConfig() from DB.schema_definition import DB self.db = DB() self.db.setUp() self.social_network_crawler = Twitter_Rest_Api(self.db) self.xml_importer = XMLImporter(self.db) self.create_author_table = CreateAuthorTables(self.db) self._targeted_twitter_author_ids = self.config.eval( 'BadActorsCollector', "targeted_twitter_author_ids") self._targeted_twitter_post_ids = self.config.eval( 'BadActorsCollector', "targeted_twitter_post_ids") self._bad_actor_collector = BadActorsCollector(self.db) def test_umlaut_on_xml_files(self): ''' We created xmls with umlaut. We should parse it to the DB. After that we should extract it from DB and assert that the umlaut encoded properly. ''' self.xml_importer.setUp() self.xml_importer.execute() post_guid = u"1fc90ec7e0e430839fb302f682f92cd8" post = self.xml_importer._db.get_post_by_id(post_guid) post_content = "kømr ljúga róa úll (foul) / fýla sǫkk (sank) / søkkva☺☻♥♦♣♠•◘○" post_content = post_content.decode('utf_8') post_title = "kømr ljúga róa úll (foul) / fýla sǫkk (sank) / søkkva øýúœóæá" post_title = post_title.decode('utf_8') self.assertEquals(post.content, post_content) self.assertEquals(post.title, post_title) self.db.session.close() def test_XML_importer_not_overwriting_bad_actor_collector(self): self._bad_actor_collector.crawl_bad_actors_followers() self._bad_actor_collector.crawl_bad_actors_retweeters() self.xml_importer.setUp() self.xml_importer.execute(getConfig().eval("DEFAULT", "start_date")) self.create_author_table.setUp() self.create_author_table.execute(getConfig().eval( "DEFAULT", "start_date")) res = self.db.get_author_by_author_guid( u'5371821e67b53582bffbb293b2554dda') author = res[0] self.assertTrue(author.xml_importer_insertion_date != None and author.bad_actors_collector_insertion_date != None) self.db.session.close() def tearDown(self): self.db.session.close() self.db.deleteDB() pass
def setUp(self): self._config_parser = getConfig() self._db = DB() self._db.setUp() self._model = Word_Embedding_Differential_Feature_Generator(self._db) self._posts = [] self._author = None self._set_author(u'test_user')
def setUp(self): self._db = DB() self._db.setUp() self._data_handler = Data_Handler(self._db, 'author_type') self._authors_to_author_features_dict = {} self._fill_empty= True self._remove_features = [] self._select_features = [] self._label_text_to_value = {'good':0,'bad':1}
def __init__(self, db): AbstractController.__init__(self, db) self._working_app_number = self._config_parser.eval( self.__class__.__name__, "working_app_number") self._maximal_get_friend_ids_requests_in_window = self._config_parser.eval( self.__class__.__name__, "maximal_get_friend_ids_requests_in_window") self._maximal_get_follower_ids_requests_in_window = self._config_parser.eval( self.__class__.__name__, "maximal_get_follower_ids_requests_in_window") self._maximal_get_user_requests_in_window = self._config_parser.eval( self.__class__.__name__, "maximal_get_user_requests_in_window") self._maximal_user_ids_allowed_in_single_get_user_request = self._config_parser.eval( self.__class__.__name__, "maximal_user_ids_allowed_in_single_get_user_request") self._num_of_twitter_status_id_requests_without_checking = self._config_parser.eval( self.__class__.__name__, "num_of_twitter_status_id_requests_without_checking") self._num_of_twitter_timeline_requests_without_checking = self._config_parser.eval( self.__class__.__name__, "num_of_twitter_timeline_requests_without_checking") self._max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request = self._config_parser.eval( self.__class__.__name__, "max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request") self._max_num_of_tweet_ids_requests_without_checking = self._config_parser.eval( self.__class__.__name__, "max_num_of_tweet_ids_requests_without_checking") self._num_of_get_friend_ids_requests = 0 self._num_of_get_follower_ids_requests = 0 self._num_of_get_timeline_statuses = 0 self._num_of_twitter_status_id_requests = 0 self._num_of_twitter_timeline_requests = 0 self._num_of_get_tweet_ids_requests = 0 self._total_author_connections = [] print("Creating TwitterApiRequester") self._twitter_api_requester = TwitterApiRequester( self._working_app_number) # self._find_source_twitter_id() logging.info("Setup DB...") print("Setup DB...") self._db = DB() self._db.setUp()
def setUp(self): self.bad_actor_guid = "e2f8a58933d5e673d9c673c442cea1b73e9732d27a0f13472fde19f0" self.good_actor_guid = "0a2f4a19fb5066c3a67fc9b3325515b8bf0db66b7fec92b63da564a9" self._db = DB() self._db.setUp() self.app_importer = AppImporter(self._db) self.app_importer.setUp() self._domain = self.app_importer._domain self.app_importer.execute()
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._author = None self._init_authors() self._init_posts() self._init_claims() self._reddit_post_by_claim_feature_generator = RedditPostByClaimFeatureGenerator( self._db, **self._get_params()) self._reddit_author_by_claim_feature_generator = RedditAuthorByClaimFeatureGenerator( self._db, **self._get_params())
def setUp(self): TestBase.setUp(self) self.config = getConfig() from DB.schema_definition import DB self.db = DB() self.db.setUp() self.social_network_crawler = Twitter_Rest_Api(self.db) self.xml_importer = XMLImporter(self.db) self.create_author_table = CreateAuthorTables(self.db) self._targeted_twitter_author_ids = self.config.eval('BadActorsCollector', "targeted_twitter_author_ids") self._targeted_twitter_post_ids = self.config.eval('BadActorsCollector', "targeted_twitter_post_ids") self._bad_actor_collector = BadActorsCollector(self.db)
class TestComlexClaimTweetImporter(TestCase): def setUp(self): self._db = DB() self._db.setUp() self._comlex_claim_tweet_importer = ComLexClaimTweetImporter(self._db) def tearDown(self): self._db.session.close() # self._db.deleteDB() def test__import_claims(self): self._comlex_claim_tweet_importer.execute(None) claims = self._db.get_claims() self.assertEqual(2296, len(claims))
def testDoubleExecute(self): import sys sys.argv = [sys.argv[0], 'config.ini'] db = DB() db.setUp() db.execute(getConfig().get("DEFAULT","start_date")) getTablesQuerys=["select * from posts","select * from authors","select * from topics","select * from author_citations","select * from authors_boost_stats","select * from post_citations","select * from posts_representativeness","select * from posts_to_pointers_scores","select * from posts_to_topic","select * from visualization_windows"] listNumTablesRows=[] for tableQuery in getTablesQuerys: listNumTablesRows.append(db.session.execute(tableQuery).scalar()) db.setUp() db.execute(getConfig().get("DEFAULT","start_date")) listNumTablesRows2=[] for tableQuery in getTablesQuerys: listNumTablesRows2.append(db.session.execute(tableQuery).scalar()) self.assertListEqual(listNumTablesRows,listNumTablesRows2,"the two executions had different results")
def setUp(self): self._config_parser = getConfig() self._db = DB() self._db.setUp() # self._Word_Embedding_Model_Creator.execute(None) self._is_load_wikipedia_300d_glove_model = True self._wikipedia_model_file_path = "data/input/glove/test_glove.6B.300d_small.txt" self._table_name = "wikipedia_model_300d" self._word_vector_dict_full_path = "data/output/word_embedding/" self._word_vector_dict = {} self._author = None self._set_author(u'test_user') self._counter = 0 self._posts = []
def setUpClass(cls): """ get_some_resource() is slow, to avoid calling it for each test use setUpClass() and store the result as class variable """ super(TestSub2VecModelCreator, cls).setUpClass() cls._db = DB() cls._db.setUp() cls.sub2vec_model_creator = Sub2VecModelCreator(cls._db) cls.sub2vec_feature_generator = Sub2VecFeatureGenerator( cls._db, **{ 'authors': [], 'posts': {} }) edges = [(0, 4), (2, 0), (1, 3), (3, 1), (0, 1), (1, 2), (4, 0), (4, 3), (2, 3), (3, 0)] cls.connected_undirected_graph = cls.create_undirected_graph( 5, edges, 'connected_undirected_graph') cls.unconnected_directed_graph = cls.connected_directed_graph( 7, edges, 'unconnected_directed_graph') cls.connected_directed_graph = cls.connected_directed_graph( 5, edges, 'connected_directed_graph') cls.unconnected_undirected_graph = cls.create_undirected_graph( 7, edges, 'unconnected_undirected_graph') cls.add_graph_to_db(cls.connected_undirected_graph) cls.add_graph_to_db(cls.unconnected_directed_graph) cls.add_graph_to_db(cls.connected_directed_graph) cls.add_graph_to_db(cls.unconnected_undirected_graph)
class TwitterCrawlerTests(unittest.TestCase): def setUp(self): self._db = DB() self._db.setUp() self._twitter_crawler = Generic_Twitter_Crawler(self._db) def test_get_posts_by_terms(self): keyword = 'security' terms = [keyword] posts = self._twitter_crawler.get_posts_by_terms(terms) self.assertEqual(100, len(posts[keyword])) def tearDown(self): self._db.session.close() self._db.deleteDB() self._db.session.close()