예제 #1
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        from DB.schema_definition import DB
        self.db = DB()
        self.db.setUp()
        self.social_network_crawler = Twitter_Rest_Api(self.db)
        self.xml_importer = XMLImporter(self.db)
        self.create_author_table = CreateAuthorTables(self.db)
        self._targeted_twitter_author_ids = self.config.eval('BadActorsCollector', "targeted_twitter_author_ids")

        self._targeted_twitter_post_ids = self.config.eval('BadActorsCollector', "targeted_twitter_post_ids")
        self._bad_actor_collector = BadActorsCollector(self.db)
    def setUp(self):
        self.config = getConfig()
        self.db = DB()
        self.db.setUp()
        self.social_network_crawler = Twitter_Rest_Api(self.db)
        self.xml_importer = XMLImporter(self.db)
        self.create_author_table = CreateAuthorTables(self.db)
        self._targeted_twitter_author_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_author_ids")
        self._domain = u'Microblog'

        self._targeted_twitter_post_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_post_ids")
        self._bad_actor_collector = BadActorsCollector(self.db)

        #The Author and Post for test_mark_missing_bad_retweeters_retrieved_from_vico
        self._author_guid1 = compute_author_guid_by_author_name(
            u'TechmarketNG')
        author = Author()
        author.name = u'TechmarketNG'
        author.domain = self._domain
        author.protected = 0
        author.author_guid = self._author_guid1
        author.author_screen_name = u'TechmarketNG'
        author.author_full_name = u'Techmarket'
        author.statuses_count = 10
        author.author_osn_id = 149159975
        author.followers_count = 12
        author.created_at = datetime.datetime.strptime('2016-04-02 00:00:00',
                                                       '%Y-%m-%d %H:%M:%S')
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self.db.add_author(author)

        post = Post()
        post.post_id = u'TestPost'
        post.author = u'TechmarketNG'
        post.guid = u'TestPost'
        post.url = u'TestPost'
        tempDate = u'2016-05-05 00:00:00'
        day = datetime.timedelta(1)
        post.date = datetime.datetime.strptime(tempDate,
                                               '%Y-%m-%d %H:%M:%S') + day * 1
        post.domain = self._domain
        post.author_guid = self._author_guid1
        post.content = u"InternetTV love it RT @benny_metanya #wow"
        post.xml_importer_insertion_date = datetime.datetime.now()
        self.db.addPost(post)

        self.db.commit()
class TestXmlImporter(TestBase):
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        from DB.schema_definition import DB
        self.db = DB()
        self.db.setUp()
        self.social_network_crawler = Twitter_Rest_Api(self.db)
        self.xml_importer = XMLImporter(self.db)
        self.create_author_table = CreateAuthorTables(self.db)
        self._targeted_twitter_author_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_author_ids")

        self._targeted_twitter_post_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_post_ids")
        self._bad_actor_collector = BadActorsCollector(self.db)

    def test_umlaut_on_xml_files(self):
        '''
        We created xmls with umlaut. We should parse it to the DB.
        After that we should extract it from DB and assert that the umlaut encoded properly.
        '''

        self.xml_importer.setUp()
        self.xml_importer.execute()

        post_guid = u"1fc90ec7e0e430839fb302f682f92cd8"
        post = self.xml_importer._db.get_post_by_id(post_guid)
        post_content = "kømr ljúga róa úll (foul) / fýla sǫkk (sank) / søkkva☺☻♥♦♣♠•◘○"
        post_content = post_content.decode('utf_8')
        post_title = "kømr ljúga róa úll (foul) / fýla sǫkk (sank) / søkkva øýúœóæá"
        post_title = post_title.decode('utf_8')

        self.assertEquals(post.content, post_content)
        self.assertEquals(post.title, post_title)
        self.db.session.close()

    def test_XML_importer_not_overwriting_bad_actor_collector(self):
        self._bad_actor_collector.crawl_bad_actors_followers()
        self._bad_actor_collector.crawl_bad_actors_retweeters()
        self.xml_importer.setUp()
        self.xml_importer.execute(getConfig().eval("DEFAULT", "start_date"))
        self.create_author_table.setUp()
        self.create_author_table.execute(getConfig().eval(
            "DEFAULT", "start_date"))
        res = self.db.get_author_by_author_guid(
            u'5371821e67b53582bffbb293b2554dda')
        author = res[0]
        self.assertTrue(author.xml_importer_insertion_date != None
                        and author.bad_actors_collector_insertion_date != None)
        self.db.session.close()

    def tearDown(self):
        self.db.session.close()
        self.db.deleteDB()
        pass
예제 #4
0
class TestBadActorCollector(unittest.TestCase):
    def setUp(self):
        self.config = getConfig()
        self.db = DB()
        self.db.setUp()
        self.social_network_crawler = Twitter_Rest_Api(self.db)
        self.xml_importer = XMLImporter(self.db)
        self.create_author_table = CreateAuthorTables(self.db)
        self._targeted_twitter_author_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_author_ids")
        self._domain = u'Microblog'

        self._targeted_twitter_post_ids = self.config.eval(
            'BadActorsCollector', "targeted_twitter_post_ids")
        self._bad_actor_collector = BadActorsCollector(self.db)

        #The Author and Post for test_mark_missing_bad_retweeters_retrieved_from_vico
        self._author_guid1 = u'05cd2e04ffaf3c5dabd03d13b63afab6'
        author = Author()
        author.name = u'TechmarketNG'
        author.domain = self._domain
        author.protected = 0
        author.author_guid = self._author_guid1
        author.author_screen_name = u'TechmarketNG'
        author.author_full_name = u'Techmarket'
        author.statuses_count = 10
        author.author_osn_id = 149159975
        author.followers_count = 12
        author.created_at = datetime.datetime.strptime('2016-04-02 00:00:00',
                                                       '%Y-%m-%d %H:%M:%S')
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self.db.add_author(author)

        post = Post()
        post.post_id = u'TestPost'
        post.author = u'TechmarketNG'
        post.guid = u'TestPost'
        post.url = u'TestPost'
        tempDate = u'2016-05-05 00:00:00'
        day = datetime.timedelta(1)
        post.date = datetime.datetime.strptime(tempDate,
                                               '%Y-%m-%d %H:%M:%S') + day * 1
        post.domain = self._domain
        post.author_guid = self._author_guid1
        post.content = u"InternetTV love it RT @benny_metanya #wow"
        post.xml_importer_insertion_date = datetime.datetime.now()
        self.db.addPost(post)

        self.db.commit()

    def test_bad_actor_collector_not_overwriting_XML_importer(self):
        self.xml_importer.setUp()
        self.xml_importer.execute(getConfig().eval("DEFAULT", "start_date"))
        self.create_author_table.setUp()
        self.create_author_table.execute(getConfig().eval(
            "DEFAULT", "start_date"))
        self._bad_actor_collector.execute()
        res = self.db.get_author_by_author_guid_and_domain(
            u'5371821e67b53582bffbb293b2554dda', self._domain)
        author = res[0]
        self.assertTrue(author.xml_importer_insertion_date != None
                        and author.bad_actors_collector_insertion_date != None)
        self.db.session.close()

    def test_BadActor_collector_crawl_bad_actors_followers(self):
        self._bad_actor_collector.crawl_bad_actors_followers()
        followers = [
            connection for connection in
            self.db.get_author_connections_by_connection_type("follower")
        ]
        cursor = self.db.get_temp_author_connections()
        temp_author_connection_tuples = [
            temp_connection for temp_connection in self.db.result_iter(cursor)
        ]
        count_total_followers = len(
            self.social_network_crawler._twitter_api_requester.
            get_follower_ids_by_user_id(714689542092169216))
        self.assertEqual(
            len(followers) + len(temp_author_connection_tuples),
            count_total_followers)
        self.db.session.close()

    def test_mark_missing_bad_retweeters_retrieved_from_vico(self):
        self._bad_actor_collector.mark_missing_bad_retweeters()
        test = False
        author = self.db.get_author_by_author_guid(self._author_guid1)[0]
        if (author.mark_missing_bad_actor_retweeters_insertion_date != None
                and author.author_type == Author_Type.BAD_ACTOR):
            test = True
        self.assertTrue(test)
        self.db.session.close()

    def test_crawl_bad_actors_retweeters(self):
        self._bad_actor_collector.crawl_bad_actors_retweeters()
        retweets = self.db.get_post_retweeter_connections_by_post_id(
            self._targeted_twitter_post_ids[0])
        self.assertGreater(len(retweets), 0)
        self.db.session.close()

    def testIncorectParameters(self):
        self.db.setUp()
        test = True
        temp = self._bad_actor_collector._actions
        try:
            self._bad_actor_collector._actions = [
                'ggg', 'crawl_bad_actors_followers'
            ]
            self._bad_actor_collector.execute()
            author_guid = compute_author_guid_by_author_name(
                u'AnikaAhammed1').replace('-', '')
            author = self.db.get_author_by_author_guid(author_guid)[0]
            if (author.bad_actors_collector_insertion_date == None
                    or author.description == None):
                test = False
        except:
            test = False
        self.assertTrue(test)
        self._bad_actor_collector._actions = temp
        self.db.session.close()

    def tearDown(self):
        self.db.session.close()
        self.db.deleteDB()