class TestLeadspottingPostsImporter(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.importer = LeadspottingPostsImporter(self._db)

    def test_skips_one_file_in_folder(self):
        print '\ntest skips one file in folder:'
        self.assertEqual(len(self._db.get_all_posts()), 0)
        self.importer.setUp()
        self.importer._data_folder = 'data/input/datasets/unittests/folder1/'
        self.importer.execute()
        self.assertEqual(len(self._db.get_all_posts()), 3)
        valid_post_id = '1a0dccaf-7fe1-38b1-a233-a3fc0dd550a1'
        invalid_post_id = '758e5131-7eec-32c9-b9f3-44484a5cb234'
        self.assertTrue(self._db.get_post_by_id(valid_post_id))
        self.assertFalse(self._db.get_post_by_id(invalid_post_id))

    def test_incorrect_fieds(self):
        #tests that importer doesn't insert to db posts from csv missing essential fields
        print 'test incorrect fields'
        self.importer.setUp()
        all_posts_in_db = self._db.get_all_posts()
        self.assertEqual(len(all_posts_in_db), 0)
        self.importer._data_folder = 'data/input/datasets/unittests/incorrect_fields/'
        self.importer.execute()
        all_posts_in_db = self._db.get_all_posts()
        self.assertEqual(len(all_posts_in_db), 0)

    def tearDown(self):
        self._db.session.close()
        self._db.deleteDB()
        self._db.session.close()
예제 #2
0
 def testDBSetUp(self):
     from sqlalchemy.engine.reflection import Inspector
     db = DB()
     db.setUp()
     session = db.Session()
     inspector = Inspector.from_engine(db.engine)
     self.assertTrue("posts" in set(inspector.get_table_names()))
예제 #3
0
 def testLogarithm(self):
     db = DB()
     db.setUp()
     session = db.Session()
     recs = session.execute("select log(10) as q;")
     for rec in recs:
         self.assertAlmostEqual(rec[0],2.302585092994046)
예제 #4
0
class TestPolitiFactPostsCrawler(TestCase):
    def setUp(self):
        self.db = DB()
        self.db.setUp()
        self.posts_crawler = PolitiFactPostsCrawler(self.db)

    def test_execute(self):
        self.posts_crawler.execute(None)
        posts = self.posts_crawler._listdic
        expected_url = "http://www.politifact.com/truth-o-meter/statements/2018/apr/06/blog-posting/fake-news-says-cnn-pulled-plug-during-youtube-shoo/"
        found = False
        for post in posts:
            if post["url"] == expected_url:
                self.assertEqual(post["date"], "2018-04-04 00:00:00")
                self.assertEqual(post["title"], "Fake news says CNN \"pulled the plug\" during YouTube shooting coverage")
                self.assertEqual(post["created_at"], "2018-04-04 00:00:00")
                self.assertEqual(post["content"], "YouTube witness makes shocking admission on live TV CNN pulls plug immediately")
                self.assertEqual(post["domain"], "PolitiFact")
                self.assertTrue(post["guid"] is not None)
                self.assertTrue(post["post_guid"] is not None)
                self.assertTrue(post["post_id"] is not None)
                self.assertTrue(post["author_guid"] is not None)
                self.assertEqual(post["author"], expected_url)
                self.assertTrue(post["author_osn_id"] is not None)
                self.assertEqual(post["references"], "")
                self.assertEqual(post["post_type"], "false")
                found = True
        if not found:
            self.fail()


    def test__build_qury_for_subject(self):
        query = self.posts_crawler._build_qury_for_subject("apple")
        self.assertEqual(query, "http://www.politifact.com/api/statements/truth-o-meter/subjects/apple/json/?n=10")
예제 #5
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._start_date = self.config.eval("DEFAULT", "start_date")
        #self._end_date = self.config.get("DEFAULT", "end_date")

        self._tsv_files_path = self.config.get("TumblrImporter", "tsv_test_files_account_properties_feature_generator")

        self._db = DB()
        self._db.setUp()

        self._tumblr_parser = TumblrImporter(self._db)
        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._author_guid = "f0f4bb42-3fed-322a-b71a-681179d47ea1"

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        account_properties_feature_generator = AccountPropertiesFeatureGenerator(self._db, **parameters)
        account_properties_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(self._author_features)
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
     self.instagram_crawler = InstagramCrawler(self._db)
     self.instagram_crawler.insta_crawler.get_json = types.MethodType(f, self.instagram_crawler.insta_crawler)
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._start_date = self.config.eval("DEFAULT", "start_date")
        #self._end_date = self.config.get("DEFAULT", "end_date")

        self._tsv_files_path = self.config.get(
            "TumblrImporter", "tsv_test_files_graph_feature_generator")

        self._db = DB()
        self._db.setUp()

        self._tumblr_parser = TumblrImporter(self._db)
        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._author_guid = u"f0f4bb42-3fed-322a-b71a-681179d47ea1"

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        graph_types = self.config.eval("GraphFeatureGenerator_1",
                                       "graph_types")
        algorithms = self.config.eval("GraphFeatureGenerator_1", "algorithms")
        aggregations = self.config.eval("GraphFeatureGenerator_1",
                                        "aggregation_functions")
        neighborhood_sizes = self.config.eval("GraphFeatureGenerator_1",
                                              "neighborhood_sizes")
        distances_from_labeled_authors = self.config.eval(
            "GraphFeatureGenerator_1", "distances_from_labeled_authors")
        graph_directed = self.config.eval("GraphFeatureGenerator_1",
                                          "graph_directed")
        graph_weights = self.config.eval("GraphFeatureGenerator_1",
                                         "graph_weights")

        parameters.update({
            "graph_types":
            graph_types,
            "algorithms":
            algorithms,
            "aggregation_functions":
            aggregations,
            "neighborhood_sizes":
            neighborhood_sizes,
            "graph_directed":
            graph_directed,
            "graph_weights":
            graph_weights,
            "distances_from_labeled_authors":
            distances_from_labeled_authors
        })

        graph_feature_generator = GraphFeatureGenerator(self._db, **parameters)
        graph_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(
            author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(
            self._author_features)
예제 #8
0
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
     self.reddit_crawler = RedditCrawler(self._db)
     self.reddit_crawler.reddit = RedditStub()
예제 #9
0
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self.tweets_crawler = OldTweetsCrawler(self._db)
     self.tweets_crawler._domain = u'Claim'
     self._add_author(u"author_guid")
     self._claims = {}
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._domain = u'test'
     self._posts = []
     self._authors = []
     self._texts = []
예제 #11
0
    def setUp(self):
        self.config = getConfig()
        self._db = DB()
        self._db.setUp()
        self.timeline_overlap = TimelineOverlapVisualizationGenerator()

        author1 = Author()
        author1.name = 'acquired_user'
        author1.domain = 'Microblog'
        author1.author_guid = 'acquired_user'
        author1.author_screen_name = 'acquired_user'
        author1.author_full_name = 'acquired_user'
        author1.author_osn_id = 1
        author1.created_at = datetime.datetime.now()
        author1.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author1.xml_importer_insertion_date = datetime.datetime.now()
        author1.author_type = 'bad_actor'
        author1.author_sub_type = 'acquired'
        self._db.add_author(author1)

        for i in range(1, 11):
            post1 = Post()
            post1.post_id = 'bad_post' + str(i)
            post1.author = 'acquired_user'
            post1.guid = 'bad_post' + str(i)
            post1.date = datetime.datetime.now()
            post1.domain = 'Microblog'
            post1.author_guid = 'acquired_user'
            post1.content = 'InternetTV love it' + str(i)
            post1.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post1)

        author = Author()
        author.name = 'TestUser1'
        author.domain = 'Microblog'
        author.author_guid = 'TestUser1'
        author.author_screen_name = 'TestUser1'
        author.author_full_name = 'TestUser1'
        author.author_osn_id = 2
        author.created_at = datetime.datetime.now()
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self._db.add_author(author)

        for i in range(1, 11):
            post = Post()
            post.post_id = 'TestPost' + str(i)
            post.author = 'TestUser1'
            post.guid = 'TestPost' + str(i)
            post.date = datetime.datetime.now()
            post.domain = 'Microblog'
            post.author_guid = 'TestUser1'
            post.content = 'InternetTV love it' + str(i)
            post.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post)

        self._db.commit()
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._claim_dictionary = {}
     self._authors = []
     self._add_author(u'test author')
     self._preprocess_visualization = ClaimToTopicConverter(self._db)
예제 #13
0
    def setUp(self):
        self.normalize_actor_guid = "00f888bdfe92039ccbc440ab27b7804040f195e9dc367bc077270033"
        self.not_normalize_author_guid = "0cc3fd06f73d6613dec1e4e31bcd7c4efd430df3b00dd7fe092cfa5b"
        self._db = DB()
        self._db.setUp()

        self.rank_app_importer = RankAppImporter(self._db)
        self.rank_app_importer.setUp()
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.fake_news_snopes_importer = FakeNewsSnopesImporter(self._db)
        self.fake_news_snopes_importer._input_csv_file = 'data/input/FakeNewsSnopesImporter/Fake_News_Snopes_V3.csv'
        self.fake_news_snopes_importer.execute()

        self.keywords_generator = KeywordsGenerator(self._db)
class TestXmlImporter(TestBase):
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        from DB.schema_definition import DB
        self.db = DB()
        self.db.setUp()
        self.social_network_crawler = Twitter_Rest_Api(self.db)
        self.xml_importer = XMLImporter(self.db)
        self.create_author_table = CreateAuthorTables(self.db)
        self._targeted_twitter_author_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_author_ids")

        self._targeted_twitter_post_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_post_ids")
        self._bad_actor_collector = BadActorsCollector(self.db)

    def test_umlaut_on_xml_files(self):
        '''
        We created xmls with umlaut. We should parse it to the DB.
        After that we should extract it from DB and assert that the umlaut encoded properly.
        '''

        self.xml_importer.setUp()
        self.xml_importer.execute()

        post_guid = u"1fc90ec7e0e430839fb302f682f92cd8"
        post = self.xml_importer._db.get_post_by_id(post_guid)
        post_content = "kømr ljúga róa úll (foul) / fýla sǫkk (sank) / søkkva☺☻♥♦♣♠•◘○"
        post_content = post_content.decode('utf_8')
        post_title = "kømr ljúga róa úll (foul) / fýla sǫkk (sank) / søkkva øýúœóæá"
        post_title = post_title.decode('utf_8')

        self.assertEquals(post.content, post_content)
        self.assertEquals(post.title, post_title)
        self.db.session.close()

    def test_XML_importer_not_overwriting_bad_actor_collector(self):
        self._bad_actor_collector.crawl_bad_actors_followers()
        self._bad_actor_collector.crawl_bad_actors_retweeters()
        self.xml_importer.setUp()
        self.xml_importer.execute(getConfig().eval("DEFAULT", "start_date"))
        self.create_author_table.setUp()
        self.create_author_table.execute(getConfig().eval(
            "DEFAULT", "start_date"))
        res = self.db.get_author_by_author_guid(
            u'5371821e67b53582bffbb293b2554dda')
        author = res[0]
        self.assertTrue(author.xml_importer_insertion_date != None
                        and author.bad_actors_collector_insertion_date != None)
        self.db.session.close()

    def tearDown(self):
        self.db.session.close()
        self.db.deleteDB()
        pass
    def setUp(self):
        self._config_parser = getConfig()
        self._db = DB()
        self._db.setUp()

        self._model = Word_Embedding_Differential_Feature_Generator(self._db)

        self._posts = []
        self._author = None
        self._set_author(u'test_user')
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._data_handler = Data_Handler(self._db, 'author_type')
        self._authors_to_author_features_dict = {}

        self._fill_empty= True
        self._remove_features = []
        self._select_features = []
        self._label_text_to_value = {'good':0,'bad':1}
예제 #18
0
    def __init__(self, db):
        AbstractController.__init__(self, db)

        self._working_app_number = self._config_parser.eval(
            self.__class__.__name__, "working_app_number")

        self._maximal_get_friend_ids_requests_in_window = self._config_parser.eval(
            self.__class__.__name__,
            "maximal_get_friend_ids_requests_in_window")

        self._maximal_get_follower_ids_requests_in_window = self._config_parser.eval(
            self.__class__.__name__,
            "maximal_get_follower_ids_requests_in_window")

        self._maximal_get_user_requests_in_window = self._config_parser.eval(
            self.__class__.__name__, "maximal_get_user_requests_in_window")

        self._maximal_user_ids_allowed_in_single_get_user_request = self._config_parser.eval(
            self.__class__.__name__,
            "maximal_user_ids_allowed_in_single_get_user_request")

        self._num_of_twitter_status_id_requests_without_checking = self._config_parser.eval(
            self.__class__.__name__,
            "num_of_twitter_status_id_requests_without_checking")
        self._num_of_twitter_timeline_requests_without_checking = self._config_parser.eval(
            self.__class__.__name__,
            "num_of_twitter_timeline_requests_without_checking")

        self._max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request = self._config_parser.eval(
            self.__class__.__name__,
            "max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request")

        self._max_num_of_tweet_ids_requests_without_checking = self._config_parser.eval(
            self.__class__.__name__,
            "max_num_of_tweet_ids_requests_without_checking")

        self._num_of_get_friend_ids_requests = 0
        self._num_of_get_follower_ids_requests = 0
        self._num_of_get_timeline_statuses = 0
        self._num_of_twitter_status_id_requests = 0
        self._num_of_twitter_timeline_requests = 0
        self._num_of_get_tweet_ids_requests = 0
        self._total_author_connections = []

        print("Creating TwitterApiRequester")
        self._twitter_api_requester = TwitterApiRequester(
            self._working_app_number)

        # self._find_source_twitter_id()

        logging.info("Setup DB...")
        print("Setup DB...")
        self._db = DB()
        self._db.setUp()
예제 #19
0
    def setUp(self):
        self.bad_actor_guid = "e2f8a58933d5e673d9c673c442cea1b73e9732d27a0f13472fde19f0"
        self.good_actor_guid = "0a2f4a19fb5066c3a67fc9b3325515b8bf0db66b7fec92b63da564a9"

        self._db = DB()
        self._db.setUp()

        self.app_importer = AppImporter(self._db)
        self.app_importer.setUp()
        self._domain = self.app_importer._domain
        self.app_importer.execute()
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
     self._init_authors()
     self._init_posts()
     self._init_claims()
     self._reddit_post_by_claim_feature_generator = RedditPostByClaimFeatureGenerator(
         self._db, **self._get_params())
     self._reddit_author_by_claim_feature_generator = RedditAuthorByClaimFeatureGenerator(
         self._db, **self._get_params())
예제 #21
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        from DB.schema_definition import DB
        self.db = DB()
        self.db.setUp()
        self.social_network_crawler = Twitter_Rest_Api(self.db)
        self.xml_importer = XMLImporter(self.db)
        self.create_author_table = CreateAuthorTables(self.db)
        self._targeted_twitter_author_ids = self.config.eval('BadActorsCollector', "targeted_twitter_author_ids")

        self._targeted_twitter_post_ids = self.config.eval('BadActorsCollector', "targeted_twitter_post_ids")
        self._bad_actor_collector = BadActorsCollector(self.db)
예제 #22
0
class TestComlexClaimTweetImporter(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()

        self._comlex_claim_tweet_importer = ComLexClaimTweetImporter(self._db)

    def tearDown(self):
        self._db.session.close()
        # self._db.deleteDB()

    def test__import_claims(self):
        self._comlex_claim_tweet_importer.execute(None)
        claims = self._db.get_claims()
        self.assertEqual(2296, len(claims))
예제 #23
0
 def testDoubleExecute(self):
     import sys
     sys.argv = [sys.argv[0], 'config.ini']
     db = DB()
     db.setUp()
     db.execute(getConfig().get("DEFAULT","start_date"))
     getTablesQuerys=["select * from posts","select * from authors","select * from topics","select * from author_citations","select * from authors_boost_stats","select * from post_citations","select * from posts_representativeness","select * from posts_to_pointers_scores","select * from posts_to_topic","select * from visualization_windows"]
     listNumTablesRows=[]
     for tableQuery in getTablesQuerys:
         listNumTablesRows.append(db.session.execute(tableQuery).scalar())
     db.setUp()
     db.execute(getConfig().get("DEFAULT","start_date"))
     listNumTablesRows2=[]
     for tableQuery in getTablesQuerys:
         listNumTablesRows2.append(db.session.execute(tableQuery).scalar())
     self.assertListEqual(listNumTablesRows,listNumTablesRows2,"the two executions had different results")
    def setUp(self):
        self._config_parser = getConfig()
        self._db = DB()
        self._db.setUp()

        # self._Word_Embedding_Model_Creator.execute(None)
        self._is_load_wikipedia_300d_glove_model = True
        self._wikipedia_model_file_path = "data/input/glove/test_glove.6B.300d_small.txt"
        self._table_name = "wikipedia_model_300d"
        self._word_vector_dict_full_path = "data/output/word_embedding/"
        self._word_vector_dict = {}

        self._author = None
        self._set_author(u'test_user')
        self._counter = 0
        self._posts = []
    def setUpClass(cls):
        """ get_some_resource() is slow, to avoid calling it for each test use setUpClass()
            and store the result as class variable
        """
        super(TestSub2VecModelCreator, cls).setUpClass()

        cls._db = DB()
        cls._db.setUp()
        cls.sub2vec_model_creator = Sub2VecModelCreator(cls._db)
        cls.sub2vec_feature_generator = Sub2VecFeatureGenerator(
            cls._db, **{
                'authors': [],
                'posts': {}
            })

        edges = [(0, 4), (2, 0), (1, 3), (3, 1), (0, 1), (1, 2), (4, 0),
                 (4, 3), (2, 3), (3, 0)]
        cls.connected_undirected_graph = cls.create_undirected_graph(
            5, edges, 'connected_undirected_graph')
        cls.unconnected_directed_graph = cls.connected_directed_graph(
            7, edges, 'unconnected_directed_graph')
        cls.connected_directed_graph = cls.connected_directed_graph(
            5, edges, 'connected_directed_graph')
        cls.unconnected_undirected_graph = cls.create_undirected_graph(
            7, edges, 'unconnected_undirected_graph')

        cls.add_graph_to_db(cls.connected_undirected_graph)
        cls.add_graph_to_db(cls.unconnected_directed_graph)
        cls.add_graph_to_db(cls.connected_directed_graph)
        cls.add_graph_to_db(cls.unconnected_undirected_graph)
class TwitterCrawlerTests(unittest.TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._twitter_crawler = Generic_Twitter_Crawler(self._db)

    def test_get_posts_by_terms(self):
        keyword = 'security'
        terms = [keyword]
        posts = self._twitter_crawler.get_posts_by_terms(terms)
        self.assertEqual(100, len(posts[keyword]))

    def tearDown(self):
        self._db.session.close()
        self._db.deleteDB()
        self._db.session.close()