class TestClaimToTopicConverter(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._posts = []
        self._claim_dictionary = {}
        self._authors = []
        self._add_author(u'test author')
        self._preprocess_visualization = ClaimToTopicConverter(self._db)

    def tearDown(self):
        self._db.session.close_all()
        self._db.deleteDB()
        self._db.session.close()

    def test_generate_topics_no_topics(self):
        claim_id_posts_dict = self._db.get_claim_id_posts_dict()
        self._preprocess_visualization.generate_topics_tables()
        topics = self._db.get_topics()
        self.assertEqual(topics, [])

    def test_generate_topics_from_1_claim(self):
        self._add_claim(u'claim1', u'claim1 content')
        self._db.session.commit()
        claim_id_posts_dict = self._db.get_claim_id_posts_dict()
        self._preprocess_visualization.generate_topics_tables()
        self.assertTopicInserted(u'claim1', [u'claim1', u'content'])

    def test_generate_topics_from_5_claims(self):
        self._add_claim(u'claim1', u'claim1 content')
        self._add_claim(u'claim2', u'claim2 content')
        self._add_claim(u'claim3', u'claim3 content move')
        self._add_claim(u'claim4', u'claim4 dif data')
        self._add_claim(u'claim5', u'claim5 some boring text')
        self._db.session.commit()
        self._preprocess_visualization.generate_topics_tables()
        self.assertTopicInserted(u'claim1', [u'claim1', u'content'])
        self.assertTopicInserted(u'claim2', [u'claim2', u'content'])
        self.assertTopicInserted(u'claim3', [u'claim3', u'content', u'move'])
        self.assertTopicInserted(u'claim4', [u'claim4', u'dif', u'data'])
        self.assertTopicInserted(u'claim5',
                                 [u'claim5', u'some', u'boring', u'text'])

    def test_generate_post_topic_mapping_no_claim(self):
        self._preprocess_visualization.generate_post_topic_mapping()
        mappings = self._db.get_post_topic_mapping()
        self.assertEqual(0, len(mappings))

    def test_generate_post_topic_mapping_1_claim(self):
        self._add_claim(u'claim1', u'claim1 content')
        self._add_post(u"test author", u'post1', u'post1 content of data',
                       u'Microblog')
        self._add_post(u"test author", u'post2', u'post2  bla bla',
                       u'Microblog')
        self._add_post(u"test author", u'post3', u'post3 noting  new',
                       u'Microblog')
        self._add_claim_tweet_connection(u'claim1', u'post1')
        self._add_claim_tweet_connection(u'claim1', u'post2')
        self._add_claim_tweet_connection(u'claim1', u'post3')
        self._db.session.commit()

        self._preprocess_visualization.generate_topics_tables()
        self._preprocess_visualization.generate_post_topic_mapping()

        mappings = self._db.get_post_topic_mapping()
        mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist)
                    for tm in mappings]
        topic_id = self._preprocess_visualization.get_claim_id_topic_dictionary(
        )[u'claim1']
        self.assertEqual(3, len(mappings))
        self.assertSetEqual(
            {('post1', topic_id, 1.0), ('post2', topic_id, 1.0),
             ('post3', topic_id, 1.0)}, set(mappings))

    def test_generate_post_topic_mapping_2_claim(self):
        self._add_claim(u'claim1', u'claim1 content')
        self._add_claim(u'claim2', u'claim1 content')
        self._add_post(u"test author", u'post1', u'post1 content of data',
                       u'Microblog')
        self._add_post(u"test author", u'post2', u'post2  bla bla',
                       u'Microblog')
        self._add_post(u"test author", u'post3', u'post3 noting  new',
                       u'Microblog')
        self._add_post(u"test author", u'post4', u'post4  bla bla',
                       u'Microblog')
        self._add_post(u"test author", u'post5', u'post5 noting  new',
                       u'Microblog')
        self._add_claim_tweet_connection(u'claim1', u'post1')
        self._add_claim_tweet_connection(u'claim1', u'post2')
        self._add_claim_tweet_connection(u'claim1', u'post3')
        self._add_claim_tweet_connection(u'claim2', u'post4')
        self._add_claim_tweet_connection(u'claim2', u'post5')
        self._db.session.commit()
        self._preprocess_visualization.generate_topics_tables()
        self._preprocess_visualization.generate_post_topic_mapping()

        mappings = self._db.get_post_topic_mapping()
        mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist)
                    for tm in mappings]
        topic_id1 = self._preprocess_visualization.get_claim_id_topic_dictionary(
        )[u'claim1']
        topic_id2 = self._preprocess_visualization.get_claim_id_topic_dictionary(
        )[u'claim2']
        self.assertEqual(5, len(mappings))
        self.assertSetEqual(
            {('post1', topic_id1, 1.0), ('post2', topic_id1, 1.0),
             ('post3', topic_id1, 1.0), ('post4', topic_id2, 1.0),
             ('post5', topic_id2, 1.0)}, set(mappings))

    def test__generate_author_topic_mapping_2_claim(self):
        self._add_author(u'test author2')
        self._add_claim(u'claim1', u'claim1 content')
        self._add_claim(u'claim2', u'claim1 content')
        self._add_post(u"test author", u'post1', u'post1 content of data',
                       u'Microblog')
        self._add_post(u"test author", u'post2', u'post2  bla bla',
                       u'Microblog')
        self._add_post(u"test author", u'post3', u'post3 noting  new',
                       u'Microblog')
        self._add_post(u"test author", u'post4', u'post4  bla bla',
                       u'Microblog')
        self._add_post(u"test author", u'post5', u'post5 noting  new',
                       u'Microblog')
        self._add_claim_tweet_connection(u'claim1', u'post1')
        self._add_claim_tweet_connection(u'claim1', u'post2')
        self._add_claim_tweet_connection(u'claim1', u'post3')
        self._add_claim_tweet_connection(u'claim2', u'post4')
        self._add_claim_tweet_connection(u'claim2', u'post5')
        self._db.session.commit()
        self._preprocess_visualization._domain = u"Microblog"
        self._preprocess_visualization.generate_topics_tables()
        self._preprocess_visualization.generate_post_topic_mapping()
        self._preprocess_visualization.generate_author_topic_mapping()

        mapping = self._db.get_author_topic_mapping()
        self.assertEqual(2, len(mapping))
        self.assertSetEqual(
            {(u'test author', 0.6, 0.4), (u'test author2', 0, 0)},
            set(mapping))

    def test_visualization(self):
        self._add_author(u'test author2', u"bad_actor")
        self._add_claim(u'claim1', u'claim1 content')
        self._add_claim(u'claim2', u'claim2 content')
        self._add_post(u"test author", u'post1', u'post1 content of data',
                       u'Microblog')
        self._add_post(u"test author", u'post2', u'post2  bla bla',
                       u'Microblog')
        self._add_post(u"test author", u'post3', u'post3 noting  new',
                       u'Microblog')
        self._add_post(u"test author2", u'post4', u'post4  bla bla',
                       u'Microblog')
        self._add_post(u"test author2", u'post5', u'post5 noting  new',
                       u'Microblog')
        self._add_claim_tweet_connection(u'claim1', u'post1')
        self._add_claim_tweet_connection(u'claim1', u'post2')
        self._add_claim_tweet_connection(u'claim1', u'post4')
        self._add_claim_tweet_connection(u'claim2', u'post3')
        self._add_claim_tweet_connection(u'claim2', u'post5')
        self._db.session.commit()
        self._preprocess_visualization._domain = u"Microblog"
        self._preprocess_visualization.execute()

        author_topic_mapping = self._db.get_author_topic_mapping()
        post_topic_mappings = self._db.get_post_topic_mapping()
        post_topic_mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist)
                               for tm in post_topic_mappings]
        topic_id1 = self._preprocess_visualization.get_claim_id_topic_dictionary(
        )[u'claim1']
        topic_id2 = self._preprocess_visualization.get_claim_id_topic_dictionary(
        )[u'claim2']
        self.assertEqual(2, len(author_topic_mapping))
        self.assertSetEqual(
            {(u'test author', 0.666666666667, 0.333333333333),
             (u'test author2', 0.5, 0.5)}, set(author_topic_mapping))
        self.assertSetEqual(
            {('post1', topic_id1, 1.0), ('post2', topic_id1, 1.0),
             ('post3', topic_id2, 1.0), ('post4', topic_id1, 1.0),
             ('post5', topic_id2, 1.0)}, set(post_topic_mappings))

    # def test_double_execution_visualization(self):
    #     self._add_author(u'test author2', u"bad_actor")
    #     self._add_claim(u'claim1', u'claim1 content')
    #     self._add_claim(u'claim2', u'claim2 content')
    #     self._add_post(u"test author", u'post1', u'post1 content of data', u'Microblog')
    #     self._add_post(u"test author", u'post2', u'post2  bla bla', u'Microblog')
    #     self._add_post(u"test author", u'post3', u'post3 noting  new', u'Microblog')
    #     self._add_post(u"test author2", u'post4', u'post4  bla bla', u'Microblog')
    #     self._add_post(u"test author2", u'post5', u'post5 noting  new', u'Microblog')
    #     self._add_claim_tweet_connection(u'claim1', u'post1')
    #     self._add_claim_tweet_connection(u'claim1', u'post2')
    #     self._add_claim_tweet_connection(u'claim1', u'post4')
    #     self._add_claim_tweet_connection(u'claim2', u'post3')
    #     self._add_claim_tweet_connection(u'claim2', u'post5')
    #     self._db.session.commit()
    #     self._preprocess_visualization._domain = u"Microblog"
    #     self._preprocess_visualization.execute()
    #     self._preprocess_visualization.execute()
    #
    #     author_topic_mapping = self._db.get_author_topic_mapping()
    #     post_topic_mappings = self._db.get_post_topic_mapping()
    #     post_topic_mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist) for tm in post_topic_mappings]
    #     topic_id1 = self._preprocess_visualization.get_claim_id_topic_dictionary()[u'claim1']
    #     topic_id2 = self._preprocess_visualization.get_claim_id_topic_dictionary()[u'claim2']
    #     self.assertEqual(2, len(author_topic_mapping))
    #     self.assertSetEqual({(u'test author2', 0.5, 0.5), (u'test author', 0.666666666667, 0.333333333333)},
    #                         set(author_topic_mapping))
    #     self.assertSetEqual(
    #         {('post1', topic_id1, 1.0), ('post2', topic_id1, 1.0), ('post3', topic_id2, 1.0), ('post4', topic_id1, 1.0),
    #          ('post5', topic_id2, 1.0)}, set(post_topic_mappings))

    def assertTopicInserted(self, claim_id, expected_terms):
        topics = self._db.get_topics()
        terms = self._db.get_terms()
        topic_dict = defaultdict(set)
        term_dict = {term.term_id: term.description for term in terms}
        for topic_id, term_id, prob in topics:
            topic_dict[topic_id].add(term_dict[term_id])
        topic_id = self._preprocess_visualization.get_claim_id_topic_dictionary(
        )[claim_id]
        claim = self._claim_dictionary[claim_id]
        expected = set(clean_tweet(claim.description).split(' '))
        self.assertIn(topic_id, topic_dict)
        self.assertSetEqual(expected, topic_dict[topic_id])
        self.assertSetEqual(set(expected_terms), topic_dict[topic_id])

    def _add_author(self, author_guid, type=u"good_actor"):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = author_guid
        author.author_screen_name = author_guid
        author.name = author_guid
        author.domain = u'Microblog'
        author.author_type = type
        self._db.add_author(author)
        self._authors.append(author)

    def _add_post(self, author_guid, title, content, domain=u'Microblog'):
        post = Post()
        post.author = author_guid
        post.author_guid = author_guid
        post.content = content
        post.title = title
        post.domain = domain
        post.post_id = title
        post.guid = post.post_id
        post.is_detailed = True
        post.is_LB = False
        self._db.addPost(post)
        self._posts.append(post)

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])
        pass

    def _add_claim(self,
                   claim_id,
                   content,
                   date_str=u"2017-06-14 05:00:00",
                   keywords=u"",
                   post_type=None):
        claim = Claim()
        claim.claim_id = claim_id
        claim.verdict = post_type
        claim.title = claim_id
        claim.description = content
        claim.verdict_date = convert_str_to_unicode_datetime(date_str)
        claim.keywords = keywords
        claim.url = u"claim url"
        self._db.addPost(claim)
        self._claim_dictionary[claim.claim_id] = claim
class TestBehaviorFeatureGenerator(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._posts = []
        self._author = None

    def tearDown(self):
        self._db.session.close()
        pass

    ######################## Average minute between posts tests ######################

    def test_average_minutes_between_posts_no_post_expected_0(self):
        self._add_author(u"author_guid")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._features = [
            'average_minutes_between_posts'
        ]
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                "table_name": "posts",
                "id": "author_guid",
                "target_field": "content",
                "where_clauses": [{
                    "field_name": 1,
                    "value": 1
                }]
            },
            "connection": {},
            "destination": {}
        }]
        result = self._behavior_feature_generator.average_minutes_between_posts(
            **{'posts': self._posts})
        self.assertEqual(0, result)

    def test_average_minutes_between_posts_one_post_expected_0(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                "table_name": "posts",
                "id": "author_guid",
                "target_field": "content",
                "where_clauses": [{
                    "field_name": 1,
                    "value": 1
                }]
            },
            "connection": {},
            "destination": {}
        }]
        self._behavior_feature_generator.execute()
        result_feature = self._db.get_author_feature(
            u"author_guid",
            u"BehaviorFeatureGenerator_average_minutes_between_posts")
        feature_value = getattr(result_feature, u'attribute_value')
        self.assertEqual('0', feature_value)

    def test_average_minutes_between_posts_3_post_expected_105(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-12 06:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-12 08:30:00")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        result = self._behavior_feature_generator.average_minutes_between_posts(
            **{'posts': self._posts})
        self.assertEqual(105, result)

    ######################## Average posts per day tests ######################

    def test_average_posts_per_day_active_days_no_posts_expect_0(self):
        self._add_author(u"author_guid")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        result = self._behavior_feature_generator.average_posts_per_day_active_days(
            **{'posts': self._posts})
        self.assertEqual(0, result)

    def test_average_posts_per_day_1_active_days_1_post_each_expect_1(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        result = self._behavior_feature_generator.average_posts_per_day_active_days(
            **{'posts': self._posts})
        self.assertAlmostEqual(1.0, result, 0.0000001)

    def test_average_posts_per_day_3_active_days_1_post_each_expect_1(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-16 05:00:00")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        result = self._behavior_feature_generator.average_posts_per_day_active_days(
            **{'posts': self._posts})
        self.assertAlmostEqual(1.0, result, 0.0000001)

    def test_average_posts_per_day_3_active_days_1_first_2_secound_3_third_expect_2(
            self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-13 06:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5", "2017-06-16 04:00:00")
        self._add_post(u"post6", u"content 6", "2017-06-16 05:00:00")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        result = self._behavior_feature_generator.average_posts_per_day_active_days(
            **{'posts': self._posts})
        self.assertEqual(2.0, result)

    def test_average_posts_per_day_3_active_days_1_first_2_secound_3_third_expect_2_represent_by_post(
            self):
        self._add_author(u"author_guid")
        self._add_post(u"post0", u"the claim", "2017-06-10 05:00:00", u"Claim")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-13 06:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5", "2017-06-16 04:00:00")
        self._add_post(u"post6", u"content 6", "2017-06-16 05:00:00")
        self._add_post(u"post7", u"content 7", "2017-06-16 06:00:00",
                       u"Not Microblog")
        self._add_claim_tweet_connection(u"post0", u"post1")
        self._add_claim_tweet_connection(u"post0", u"post2")
        self._add_claim_tweet_connection(u"post0", u"post3")
        self._add_claim_tweet_connection(u"post0", u"post4")
        self._add_claim_tweet_connection(u"post0", u"post5")
        self._add_claim_tweet_connection(u"post0", u"post6")
        self._add_claim_tweet_connection(u"post0", u"post7")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name':
                'claim_tweet_connection',
                'source_id':
                'claim_id',
                'target_id':
                'post_id',
                "where_clauses": [{
                    "val1": "source.date",
                    "val2": "dest.date",
                    "op": "<="
                }]
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }]
        self._behavior_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u'post0',
            u"BehaviorFeatureGenerator_average_posts_per_day_active_days")
        self.assertEqual(u'2.0', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            u'post0', u"BehaviorFeatureGenerator_average_posts_per_day_total")
        self.assertGreater(float(author_feature.attribute_value), 0)

    def test_retweet_count_0_posts(self):
        self._add_author(u"author_guid")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                'table_name': 'authors',
                'id': 'author_guid',
                "target_field": "author_guid",
                "where_clauses": [{
                    "field_name": "1",
                    "value": "1"
                }]
            },
            'connection': {},
            'destination': {}
        }]
        self._behavior_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u'author_guid', u"BehaviorFeatureGenerator_retweet_count")
        self.assertEqual(u'0', author_feature.attribute_value)

    def test_retweet_count_1_retweet(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"RT @content 1", "2017-06-12 05:00:00")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                'table_name': 'authors',
                'id': 'author_guid',
                "target_field": "author_guid",
                "where_clauses": [{
                    "field_name": "1",
                    "value": "1"
                }]
            },
            'connection': {},
            'destination': {}
        }]
        self._behavior_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u'author_guid', u"BehaviorFeatureGenerator_retweet_count")
        self.assertEqual(u'1', author_feature.attribute_value)

    def test_retweet_count_3_retweet(self):
        self._add_author(u"author_guid")
        self._add_post(u"post0", u"the claim", "2017-06-10 05:00:00", u"Claim")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"RT @content 3 RT @hi",
                       "2017-06-13 06:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5 bla RT @bla",
                       "2017-06-16 04:00:00")
        self._add_post(u"post6", u"RT @content 6", "2017-06-16 05:00:00")
        self._add_post(u"post7", u"content 7 RT @wow", "2017-06-16 06:00:00",
                       u"Not Microblog")
        self._add_claim_tweet_connection(u"post0", u"post1")
        self._add_claim_tweet_connection(u"post0", u"post2")
        self._add_claim_tweet_connection(u"post0", u"post3")
        self._add_claim_tweet_connection(u"post0", u"post4")
        self._add_claim_tweet_connection(u"post0", u"post5")
        self._add_claim_tweet_connection(u"post0", u"post6")
        self._add_claim_tweet_connection(u"post0", u"post7")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id',
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }]
        self._behavior_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u'post0', u"BehaviorFeatureGenerator_retweet_count")
        self.assertEqual(u'3', author_feature.attribute_value)

        author_feature = self._db.get_author_feature(
            u'post0', u"BehaviorFeatureGenerator_average_retweets")
        self.assertEqual(u'0.5', author_feature.attribute_value)

    def test_received_retweets_count_0_retweets(self):
        self._add_author(u"author_guid")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                'table_name': 'authors',
                'id': 'author_guid',
                "target_field": "author_guid",
                "where_clauses": [{
                    "field_name": "1",
                    "value": "1"
                }]
            },
            'connection': {},
            'destination': {}
        }]
        self._behavior_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u'author_guid', u"BehaviorFeatureGenerator_retweet_count")
        self.assertEqual(u'0', author_feature.attribute_value)

    def test_received_retweets_count_1_retweets(self):
        self._add_author(u"author_guid")
        self._add_post(u"post1", u"RT @author_guid content 1",
                       "2017-06-12 05:00:00")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                'table_name': 'authors',
                'id': 'author_guid',
                "target_field": "author_guid",
                "where_clauses": [{
                    "field_name": "1",
                    "value": "1"
                }]
            },
            'connection': {},
            'destination': {}
        }]
        self._behavior_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u'author_guid', u"BehaviorFeatureGenerator_retweet_count")
        self.assertEqual(u'1', author_feature.attribute_value)

    def test_received_retweets_count_3_retweets_only_from_microblog_tweets(
            self):
        self._add_author(u"author_guid")
        self._add_post(u"post0", u"the claim", "2017-06-10 05:00:00", u"Claim")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"RT @author_guid content 3 RT @hi",
                       "2017-06-13 06:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5 bla RT @author_guid",
                       "2017-06-16 04:00:00")
        self._add_post(u"post6", u"RT @content 6", "2017-06-16 05:00:00")
        self._add_post(u"post7", u"content 7 RT @wow", "2017-06-16 06:00:00",
                       u"Not Microblog")
        self._add_claim_tweet_connection(u"post0", u"post1")
        self._add_claim_tweet_connection(u"post0", u"post2")
        self._add_claim_tweet_connection(u"post0", u"post3")
        self._add_claim_tweet_connection(u"post0", u"post4")
        self._add_claim_tweet_connection(u"post0", u"post5")
        self._add_claim_tweet_connection(u"post0", u"post6")
        self._add_claim_tweet_connection(u"post0", u"post7")
        self._db.add_author(self._author)
        self._db.session.commit()
        params = self._get_params()
        self._behavior_feature_generator = BehaviorFeatureGenerator(
            self._db, **params)
        self._behavior_feature_generator._targeted_fields = [{
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id',
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }]
        self._behavior_feature_generator.execute()
        author_feature = self._db.get_author_feature(
            u'post0', u"BehaviorFeatureGenerator_retweet_count")
        self.assertEqual(u'3', author_feature.attribute_value)

    ######################## argument_parser tests ######################

    def test_argument_parser_connection_conditions(self):
        self._add_author(u"author_guid")
        self._add_post(u"post0", u"the claim", "2017-06-14 05:00:00", u"Claim")
        self._add_post(u"post1", u"content 1", "2017-06-12 05:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-13 06:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5", "2017-06-16 04:00:00")
        self._add_post(u"post6", u"content 6", "2017-06-16 05:00:00")
        self._add_post(u"post7", u"content 7", "2017-06-16 06:00:00",
                       u"Not Microblog")
        self._add_claim_tweet_connection(u"post0", u"post1")
        self._add_claim_tweet_connection(u"post0", u"post2")
        self._add_claim_tweet_connection(u"post0", u"post3")
        self._add_claim_tweet_connection(u"post0", u"post4")
        self._add_claim_tweet_connection(u"post0", u"post5")
        self._add_claim_tweet_connection(u"post0", u"post6")
        self._add_claim_tweet_connection(u"post0", u"post7")
        self._db.add_author(self._author)
        self._db.session.commit()
        arg_parser = ArgumentParser(self._db)
        args = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name':
                'claim_tweet_connection',
                'source_id':
                'claim_id',
                'target_id':
                'post_id',
                "where_clauses": [{
                    "val1": "source.date",
                    "val2": "dest.date",
                    "op": "<="
                }]
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        source_id_target_elements_dict = arg_parser._get_source_id_target_elements(
            args)
        actual = set([
            element.post_id
            for element in source_id_target_elements_dict["post0"]
        ])
        expected = {'post4', 'post5', 'post6'}
        self.assertSetEqual(actual, expected)

    def test_argument_parser_connection_conditions_with_timedelta(self):
        self._add_author(u"author_guid")
        self._add_post(u"post0", u"the claim", "2017-06-14 05:00:00", u"Claim")
        self._add_post(u"post1", u"content 1", "2017-06-12 06:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-13 06:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5", "2017-06-16 04:00:00")
        self._add_post(u"post6", u"content 6", "2017-06-16 05:00:00")
        self._add_post(u"post7", u"content 7", "2017-06-16 06:00:00",
                       u"Not Microblog")
        self._add_claim_tweet_connection(u"post0", u"post1")
        self._add_claim_tweet_connection(u"post0", u"post2")
        self._add_claim_tweet_connection(u"post0", u"post3")
        self._add_claim_tweet_connection(u"post0", u"post4")
        self._add_claim_tweet_connection(u"post0", u"post5")
        self._add_claim_tweet_connection(u"post0", u"post6")
        self._add_claim_tweet_connection(u"post0", u"post7")
        self._db.add_author(self._author)
        self._db.session.commit()
        arg_parser = ArgumentParser(self._db)
        args = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name':
                'claim_tweet_connection',
                'source_id':
                'claim_id',
                'target_id':
                'post_id',
                "where_clauses": [{
                    "val1": "source.date",
                    "val2": "dest.date",
                    "op": "timeinterval",
                    "delta": 1
                }]
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        source_id_target_elements_dict = arg_parser._get_source_id_target_elements(
            args)
        actual = set([
            element.post_id
            for element in source_id_target_elements_dict["post0"]
        ])
        expected = {'post2', 'post3'}
        self.assertSetEqual(actual, expected)

    def test_argument_parser_connection_conditions_with_before_timedelta(self):
        self._add_author(u"author_guid")
        self._add_post(u"post0", u"the claim", "2017-06-14 05:00:00", u"Claim")
        self._add_post(u"post1", u"content 1", "2017-06-12 06:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-13 06:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5", "2017-06-16 04:00:00")
        self._add_post(u"post6", u"content 6", "2017-06-16 05:00:00")
        self._add_post(u"post7", u"content 7", "2017-06-16 06:00:00",
                       u"Not Microblog")
        self._add_claim_tweet_connection(u"post0", u"post1")
        self._add_claim_tweet_connection(u"post0", u"post2")
        self._add_claim_tweet_connection(u"post0", u"post3")
        self._add_claim_tweet_connection(u"post0", u"post4")
        self._add_claim_tweet_connection(u"post0", u"post5")
        self._add_claim_tweet_connection(u"post0", u"post6")
        self._add_claim_tweet_connection(u"post0", u"post7")
        self._db.add_author(self._author)
        self._db.session.commit()
        arg_parser = ArgumentParser(self._db)
        args = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name':
                'claim_tweet_connection',
                'source_id':
                'claim_id',
                'target_id':
                'post_id',
                "where_clauses": [{
                    "val1": "source.date",
                    "val2": "dest.date",
                    "op": "before",
                    "delta": 1
                }]
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        source_id_target_elements_dict = arg_parser._get_source_id_target_elements(
            args)
        actual = set([
            element.post_id
            for element in source_id_target_elements_dict["post0"]
        ])
        expected = {'post2', u'post3'}
        self.assertSetEqual(actual, expected)

    def test_argument_parser_connection_conditions_with_after_timedelta(self):
        self._add_author(u"author_guid")
        self._add_post(u"post0", u"the claim", "2017-06-14 05:00:00", u"Claim")
        self._add_post(u"post1", u"content 1", "2017-06-12 06:00:00")
        self._add_post(u"post2", u"content 2", "2017-06-13 05:00:00")
        self._add_post(u"post3", u"content 3", "2017-06-15 05:00:00")
        self._add_post(u"post4", u"content 4", "2017-06-16 03:00:00")
        self._add_post(u"post5", u"content 5", "2017-06-16 04:00:00")
        self._add_post(u"post6", u"content 6", "2017-06-16 05:00:00")
        self._add_post(u"post7", u"content 7", "2017-06-16 06:00:00",
                       u"Not Microblog")
        self._add_claim_tweet_connection(u"post0", u"post1")
        self._add_claim_tweet_connection(u"post0", u"post2")
        self._add_claim_tweet_connection(u"post0", u"post3")
        self._add_claim_tweet_connection(u"post0", u"post4")
        self._add_claim_tweet_connection(u"post0", u"post5")
        self._add_claim_tweet_connection(u"post0", u"post6")
        self._add_claim_tweet_connection(u"post0", u"post7")
        self._db.add_author(self._author)
        self._db.session.commit()
        arg_parser = ArgumentParser(self._db)
        args = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name':
                'claim_tweet_connection',
                'source_id':
                'claim_id',
                'target_id':
                'post_id',
                "where_clauses": [{
                    "val1": "source.date",
                    "val2": "dest.date",
                    "op": "after",
                    "delta": 1
                }]
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        source_id_target_elements_dict = arg_parser._get_source_id_target_elements(
            args)
        actual = set([
            element.post_id
            for element in source_id_target_elements_dict["post0"]
        ])
        expected = {'post3'}
        self.assertSetEqual(actual, expected)

    def _add_author(self, author_guid):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = u'test author'
        author.author_screen_name = author_guid
        author.name = u'test'
        author.domain = u'tests'
        author.statuses_count = 0
        author.created_at = u"2017-06-14 05:00:00"
        # self._db.add_author(author)
        self._author = author

    def _add_post(self, title, content, date_str, domain=u'Microblog'):
        post = Post()
        post.author = self._author.author_guid
        post.author_guid = self._author.author_guid
        post.content = content
        post.title = title
        post.domain = domain
        post.post_id = title
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime(date_str)
        post.created_at = post.date
        self._db.addPost(post)
        self._posts.append(post)

        self._author.statuses_count += 1

    def _get_params(self):
        posts = {self._author.author_guid: self._posts}
        params = {'authors': [self._author], 'posts': posts}
        return params

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])
        pass
class TestGensimWordEmbeddingsModelTrainer(TestCase):
    def setUp(self):
        self._config_parser = getConfig()
        self._db = DB()
        self._db.setUp()

        # self._Word_Embedding_Model_Creator.execute(None)
        self._is_load_wikipedia_300d_glove_model = True
        self._wikipedia_model_file_path = "data/input/glove/test_glove.6B.300d_small.txt"
        self._table_name = "wikipedia_model_300d"
        self._word_vector_dict_full_path = "data/output/word_embedding/"
        self._word_vector_dict = {}

        self._author = None
        self._set_author(u'test_user')
        self._counter = 0
        self._posts = []

    def tearDown(self):
        self._db.session.close()

    def test_add_additional_fields_to_existing_table(self):
        self._add_post(u'was', u'is')
        self._add_post(u'is', u'was')
        self._db.session.commit()
        self._word_embedding_model_creator = GensimWordEmbeddingsModelTrainer(
            self._db)

        self._word_embedding_model_creator.execute(None)
        self._word_embedding_model_creator._aggregation_functions_names = [
            'sum'
        ]
        self._word_embedding_model_creator.execute(None)

        file_output_path = self._word_embedding_model_creator._saved_models_path + self._word_embedding_model_creator._table_name + ".csv"
        data = pd.DataFrame.from_csv(file_output_path)

        word_embedding_results = data.loc[(data['author_id'] == 'test_user')
                                          & (data['table_name'] == u'posts') &
                                          (data['targeted_field_name']
                                           == u'content')]
        sum_value_df = word_embedding_results.loc[
            word_embedding_results[u'word_embedding_type'] == u'sum']
        mean_value_df = word_embedding_results.loc[
            word_embedding_results[u'word_embedding_type'] == u'np.mean']

        try:
            if len(sum_value_df.values.tolist()) > 0 and len(
                    mean_value_df.values.tolist()) > 0:
                self.assertTrue(True)
            else:
                self.fail()
        except:
            self.fail()

    def test_case_post_represent_by_posts(self):
        self._add_post(u'post1', u'the claim', u'Claim')
        self._add_post(u'post2', u'dog cat pig man')  # 2
        self._add_post(u'post3', u'TV is the best guys')  # 1
        self._add_claim_tweet_connection(u'post1', u'post2')
        self._add_claim_tweet_connection(u'post1', u'post3')
        self._db.session.commit()
        self._word_embedding_model_creator = GensimWordEmbeddingsModelTrainer(
            self._db)
        self._word_embedding_model_creator._targeted_fields_for_embedding = [{
            'source': {
                'table_name': 'posts',
                'id': 'post_id'
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id'
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": []
            }
        }]

        self._word_embedding_model_creator.execute(None)
        model_name_path = self._word_embedding_model_creator._prepare_model_name_path(
        )
        model = Word2Vec.load(model_name_path)
        word_vector_dict = self._word_embedding_model_creator._get_word_embedding_dict(
            model)
        self._words = word_vector_dict
        self._words_vectors = self._get_posts_val()
        expected_val = self._calc_results()
        self._generic_test(expected_val, u'post1')

    def _setup_test(self):
        self._db.session.commit()
        self._word_embedding_model_creator = GensimWordEmbeddingsModelTrainer(
            self._db)
        self._word_embedding_model_creator.execute(None)

        self._words = self._db.get_word_embedding_dictionary()
        self._words_vectors = self._get_posts_val()

    def _generic_test(self, expected_value, source_id=u""):
        if source_id == u"":
            source_id = self._author.author_guid

        file_output_path = self._word_embedding_model_creator._saved_models_path + self._word_embedding_model_creator._table_name + ".csv"
        data = pd.DataFrame.from_csv(file_output_path)

        word_embedding_results = data.loc[(data['author_id'] == source_id)
                                          & (data['table_name'] == u'posts') &
                                          (data['targeted_field_name']
                                           == u'content')]

        self.assert_word_embedding(word_embedding_results, expected_value,
                                   u'min')
        self.assert_word_embedding(word_embedding_results, expected_value,
                                   u'max')
        self.assert_word_embedding(word_embedding_results, expected_value,
                                   u'np.mean')

    def assert_word_embedding(self, db_results, expected_value, type):
        result_value = db_results.loc[db_results[u'word_embedding_type'] ==
                                      type, '0':].values.tolist()[0]
        self.assertEquals(list(expected_value[type]), result_value)

    def _generic_non_equal_test(self, expected_value):
        db_results = self._db.get_author_word_embedding(
            self._author.author_guid, u'posts', u'content')
        self.assertNotEqual(expected_value[u'min'], db_results[u'min'])
        self.assertNotEqual(expected_value[u'max'], db_results[u'max'])
        self.assertNotEqual(expected_value[u'np.mean'], db_results[u'np.mean'])

    def _set_author(self, author_guid):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = u'name' + author_guid
        author.name = u'name' + author_guid
        author.domain = u'test'
        self._db.add_author(author)
        self._author = author

    def _add_post(self, title, content, _domain=u'Microblog'):
        post = Post()
        post.author = self._author.author_full_name
        post.author_guid = self._author.author_guid
        post.content = content
        post.title = title
        post.domain = _domain
        post.post_id = title
        post.guid = title
        self._db.addPost(post)
        self._posts.append(post)

    def _get_posts_val(
            self):  # return the vectors for all the words in the added posts
        vals = {}
        for post in self._posts:
            for word in post.content.split():
                if word in self._words.keys():
                    vals[word] = self._words[word]
        return vals.values()

    def _calc_mean(self, vectors):
        vectors = self._get_posts_val()
        if len(vectors) == 0:
            return (0, ) * 300
        ziped_vec = zip(*vectors)
        result = map(eval('np.mean'), ziped_vec)
        return tuple(result)

    def _calc_min(self, vectors):
        vectors = self._get_posts_val()
        if len(vectors) == 0:
            return (0, ) * 300
        ziped_vec = zip(*vectors)
        result = map(eval('min'), ziped_vec)
        return tuple(result)

    def _calc_max(self, vectors):
        vectors = self._get_posts_val()
        if len(vectors) == 0:
            return (0, ) * 300
        ziped_vec = zip(*vectors)
        result = map(eval('max'), ziped_vec)
        return tuple(result)

    def _calc_results(self):
        vectors = self._words_vectors
        results = {}
        results[u'min'] = self._calc_min(vectors)
        results[u'max'] = self._calc_max(vectors)
        results[u'np.mean'] = self._calc_mean(vectors)
        return results

    def _add_target_article(self, post_id, title, description, author_guid):
        target_article = Target_Article()
        target_article.author_guid = author_guid
        target_article.post_id = post_id
        target_article.title = title
        target_article.description = description
        target_article.keywords = 'temp, lala, fafa'
        self._db.add_target_articles([target_article])

    def _add_target_article_item(self, post_id, type, content, author_guid):
        article_item = Target_Article_Item()
        article_item.post_id = post_id
        article_item.type = type
        article_item.item_number = 3
        article_item.content = content
        article_item.author_guid = author_guid
        self._db.addPosts([article_item])

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])
        pass
class RedditFeatureGeneratorTest(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._posts = []
        self._author = None
        self._init_authors()
        self._init_posts()
        self._init_claims()
        self._reddit_post_by_claim_feature_generator = RedditPostByClaimFeatureGenerator(
            self._db, **self._get_params())
        self._reddit_author_by_claim_feature_generator = RedditAuthorByClaimFeatureGenerator(
            self._db, **self._get_params())

    def tearDown(self):
        self._db.session.close()
        pass

    def test_karma_by_submission_and_comment(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'min_karma_by_submission_and_comment',
            'expected': -13
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'min_karma_by_submission_and_comment',
            'expected': -321
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'min_karma_by_submission_and_comment',
            'expected': 1
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'max_karma_by_submission_and_comment',
            'expected': 52312
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'max_karma_by_submission_and_comment',
            'expected': 102
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'max_karma_by_submission_and_comment',
            'expected': 234
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'mean_karma_by_submission_and_comment',
            'expected': 5904.222222
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'mean_karma_by_submission_and_comment',
            'expected': -19.55555556
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'mean_karma_by_submission_and_comment',
            'expected': 38.5
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'median_karma_by_submission_and_comment',
            'expected': 27
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'median_karma_by_submission_and_comment',
            'expected': 7
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'median_karma_by_submission_and_comment',
            'expected': 5
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'skew_karma_by_submission_and_comment',
            'expected': 2.998904337
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'skew_karma_by_submission_and_comment',
            'expected': -2.525365088
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'skew_karma_by_submission_and_comment',
            'expected': 2.234762661
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'kurtosis_karma_by_submission_and_comment',
            'expected': 8.995080203
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'kurtosis_karma_by_submission_and_comment',
            'expected': 7.357797068
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'kurtosis_karma_by_submission_and_comment',
            'expected': 4.503581242
        }]
        self._reddit_post_by_claim_feature_generator._measure_names = [
            'karma_by_submission_and_comment'
        ]
        self._reddit_post_by_claim_feature_generator._aggregation_functions = [
            'min', 'max', 'mean', 'median', 'skew', 'kurtosis'
        ]
        self._reddit_post_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_post_by_claim_feature_generator.
                __class__.__name__)

    def test_karma_by_submission(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'min_karma_by_submission',
            'expected': 738
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'min_karma_by_submission',
            'expected': -321
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'min_karma_by_submission',
            'expected': 123
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'max_karma_by_submission',
            'expected': 52312
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'max_karma_by_submission',
            'expected': 102
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'max_karma_by_submission',
            'expected': 234
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'mean_karma_by_submission',
            'expected': 26525
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'mean_karma_by_submission',
            'expected': -109.5
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'mean_karma_by_submission',
            'expected': 178.5
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'median_karma_by_submission',
            'expected': 26525
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'median_karma_by_submission',
            'expected': -109.5
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'median_karma_by_submission',
            'expected': 178.5
        }]
        self._reddit_post_by_claim_feature_generator._measure_names = [
            'karma_by_submission'
        ]
        self._reddit_post_by_claim_feature_generator._aggregation_functions = [
            'min', 'max', 'mean', 'median', 'skew', 'kurtosis'
        ]
        self._reddit_post_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_post_by_claim_feature_generator.
                __class__.__name__)

    def test_upvotes_by_submission(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'min_upvotes_by_submission',
            'expected': 762
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'min_upvotes_by_submission',
            'expected': 112
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'min_upvotes_by_submission',
            'expected': 369
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'max_upvotes_by_submission',
            'expected': 74593
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'max_upvotes_by_submission',
            'expected': 241
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'max_upvotes_by_submission',
            'expected': 2067
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'mean_upvotes_by_submission',
            'expected': 37677.5
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'mean_upvotes_by_submission',
            'expected': 176.5
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'mean_upvotes_by_submission',
            'expected': 1218
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'median_upvotes_by_submission',
            'expected': 37677.5
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'median_upvotes_by_submission',
            'expected': 176.5
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'median_upvotes_by_submission',
            'expected': 1218
        }]
        self._reddit_post_by_claim_feature_generator._measure_names = [
            'upvotes_by_submission'
        ]
        self._reddit_post_by_claim_feature_generator._aggregation_functions = [
            'min', 'max', 'mean', 'median', 'skew', 'kurtosis'
        ]
        self._reddit_post_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_post_by_claim_feature_generator.
                __class__.__name__)

    def test_downvotes_by_submission(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'min_downvotes_by_submission',
            'expected': 24
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'min_downvotes_by_submission',
            'expected': 10
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'min_downvotes_by_submission',
            'expected': 246
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'max_downvotes_by_submission',
            'expected': 22281
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'max_downvotes_by_submission',
            'expected': 562
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'max_downvotes_by_submission',
            'expected': 1833
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'mean_downvotes_by_submission',
            'expected': 11152.5
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'mean_downvotes_by_submission',
            'expected': 286
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'mean_downvotes_by_submission',
            'expected': 1039.5
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'median_downvotes_by_submission',
            'expected': 11152.5
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'median_downvotes_by_submission',
            'expected': 286
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'median_downvotes_by_submission',
            'expected': 1039.5
        }]
        self._reddit_post_by_claim_feature_generator._measure_names = [
            'downvotes_by_submission'
        ]
        self._reddit_post_by_claim_feature_generator._aggregation_functions = [
            'min', 'max', 'mean', 'median', 'skew', 'kurtosis'
        ]
        self._reddit_post_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_post_by_claim_feature_generator.
                __class__.__name__)

    def test_author_comment_karma(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'min_comment_karma',
            'expected': 2261
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'min_comment_karma',
            'expected': 2842
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'min_comment_karma',
            'expected': 2842
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'max_comment_karma',
            'expected': 37027
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'max_comment_karma',
            'expected': 35111
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'max_comment_karma',
            'expected': 30880
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'mean_comment_karma',
            'expected': 19096.66667
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'mean_comment_karma',
            'expected': 18031
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'mean_comment_karma',
            'expected': 11833.5
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'median_comment_karma',
            'expected': 22588
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'median_comment_karma',
            'expected': 16555
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'median_comment_karma',
            'expected': 6806
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'skew_comment_karma',
            'expected': -0.018614054
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'skew_comment_karma',
            'expected': 0.128211429
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'skew_comment_karma',
            'expected': 1.862860226
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'kurtosis_comment_karma',
            'expected': -1.992620739
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'kurtosis_comment_karma',
            'expected': -2.723581645
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'kurtosis_comment_karma',
            'expected': 3.595027437
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'comment_karma'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = [
            'min', 'max', 'mean', 'median', 'skew', 'kurtosis'
        ]
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_link_karma(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'min_link_karma',
            'expected': 1
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'min_link_karma',
            'expected': 1
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'min_link_karma',
            'expected': 90
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'max_link_karma',
            'expected': 171576
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'max_link_karma',
            'expected': 171576
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'max_link_karma',
            'expected': 5897
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'mean_link_karma',
            'expected': 20565.77778
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'mean_link_karma',
            'expected': 29840.16667
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'mean_link_karma',
            'expected': 1866
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'median_link_karma',
            'expected': 1341
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'median_link_karma',
            'expected': 738.5
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'median_link_karma',
            'expected': 738.5
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'skew_link_karma',
            'expected': 2.991811692
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'skew_link_karma',
            'expected': 2.443747273
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'skew_link_karma',
            'expected': 1.751305522
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'kurtosis_link_karma',
            'expected': 8.963145712
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'kurtosis_link_karma',
            'expected': 5.977609271
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'kurtosis_link_karma',
            'expected': 3.018013716
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'link_karma'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = [
            'min', 'max', 'mean', 'median', 'skew', 'kurtosis'
        ]
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_total_karma(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'min_total_karma',
            'expected': 2435
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'min_total_karma',
            'expected': 6379
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'min_total_karma',
            'expected': 6379
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'max_total_karma',
            'expected': 206687
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'max_total_karma',
            'expected': 206687
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'max_total_karma',
            'expected': 32221
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'mean_total_karma',
            'expected': 39662.44444
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'mean_total_karma',
            'expected': 47871.16667
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'mean_total_karma',
            'expected': 13699.5
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'median_total_karma',
            'expected': 22589
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'median_total_karma',
            'expected': 17240.5
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'median_total_karma',
            'expected': 8099
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'skew_total_karma',
            'expected': 2.767953592
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'skew_total_karma',
            'expected': 2.349097328
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'skew_total_karma',
            'expected': 1.963784833
        }, {
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': 'kurtosis_total_karma',
            'expected': 7.954685555
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': 'kurtosis_total_karma',
            'expected': 5.605190323
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': 'kurtosis_total_karma',
            'expected': 3.878351431
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'total_karma'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = [
            'min', 'max', 'mean', 'median', 'skew', 'kurtosis'
        ]
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_count_is_gold(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': '_count_is_gold',
            'expected': 3
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': '_count_is_gold',
            'expected': 3
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': '_count_is_gold',
            'expected': 3
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'count_is_gold'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = []
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_count_is_moderator(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': '_count_is_moderator',
            'expected': 2
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': '_count_is_moderator',
            'expected': 1
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': '_count_is_moderator',
            'expected': 0
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'count_is_moderator'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = []
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_count_is_employee(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': '_count_is_employee',
            'expected': 3
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': '_count_is_employee',
            'expected': 1
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': '_count_is_employee',
            'expected': 1
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'count_is_employee'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = []
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_ratio_is_gold(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': '_ratio_is_gold',
            'expected': 0.333333333
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': '_ratio_is_gold',
            'expected': 0.5
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': '_ratio_is_gold',
            'expected': 0.75
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'ratio_is_gold'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = []
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_ratio_is_moderator(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': '_ratio_is_moderator',
            'expected': 0.222222222
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': '_ratio_is_moderator',
            'expected': 0.166666667
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': '_ratio_is_moderator',
            'expected': 0
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'ratio_is_moderator'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = []
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def test_author_ratio_is_employee(self):
        test_cases = [{
            'claim_id': 'cd2e1978-4dfa-3a40-b62f-71153001629c',
            'test_name': '_ratio_is_employee',
            'expected': 0.333333333
        }, {
            'claim_id': 'a4beae51-463f-33fc-bbf6-20eca5104afe',
            'test_name': '_ratio_is_employee',
            'expected': 0.166666667
        }, {
            'claim_id': '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            'test_name': '_ratio_is_employee',
            'expected': 0.25
        }]
        self._reddit_author_by_claim_feature_generator._measure_names = [
            'ratio_is_employee'
        ]
        self._reddit_author_by_claim_feature_generator._aggregation_functions = []
        self._reddit_author_by_claim_feature_generator.execute()

        for test_case in test_cases:
            self.assert_author_feature_test_case(
                test_case, self._reddit_author_by_claim_feature_generator.
                __class__.__name__)

    def assert_author_feature_test_case(self, test_case, class_name):
        self.assert_author_feature_number(
            test_case['claim_id'], "{}_{}".format(class_name,
                                                  test_case['test_name']),
            test_case['expected'])

    def assert_author_feature_number(self, author_guid, attribute_name,
                                     expected):
        result_feature = self._db.get_author_feature(author_guid,
                                                     attribute_name)
        feature_value = getattr(result_feature, 'attribute_value')
        self.assertAlmostEqual(float(expected), float(feature_value), places=2)

    def _add_author(self,
                    name=None,
                    link_karma=None,
                    comment_karma=None,
                    is_employee=0,
                    is_mod=0,
                    is_gold=0,
                    author_osn_id=None):
        author = Author()
        reddit_author = RedditAuthor()
        author.name = name
        author.author_screen_name = author.name
        author.author_guid = compute_author_guid_by_author_name(author.name)
        author.domain = 'reddit'
        author.author_osn_id = author_osn_id
        author.author_full_name = name
        author.url = 'https://www.reddit.com/user/' + name

        reddit_author.name = author.name
        reddit_author.author_guid = author.author_guid

        reddit_author.comments_count = None
        reddit_author.comment_karma = comment_karma
        reddit_author.link_karma = link_karma
        reddit_author.is_gold = is_gold
        reddit_author.is_moderator = is_mod
        reddit_author.is_employee = is_employee

        self._db.add_authors([author])
        self._db.add_reddit_authors([reddit_author])
        # self._author = author

    def _add_post(self, author, date, post_osn_id, score=0, upvote_ratio=-1):
        post = Post()
        post.post_osn_id = post_osn_id
        post.author = str(author)
        post.author_guid = compute_author_guid_by_author_name(post.author)
        post.created_at = str_to_date(date, formate="%d/%m/%Y %H:%M")
        post.url = 'https://www.reddit.com{}'.format(
            post.author)  # just for test
        post.guid = compute_post_guid(post.url, post.post_osn_id,
                                      date_to_str(post.created_at))
        post.domain = 'reddit_comment'
        post.post_type = 'reddit_comment'
        post.post_id = post.guid

        reddit_post = RedditPost()
        reddit_post.post_id = post.post_id
        reddit_post.guid = post.guid
        reddit_post.score = score
        if upvote_ratio != -1:
            post.domain = 'reddit_post'
            post.post_type = 'reddit_post'
            reddit_post.upvote_ratio = upvote_ratio
            reddit_post.ups = int(
                round((reddit_post.upvote_ratio * reddit_post.score) /
                      (2 * reddit_post.upvote_ratio - 1)) if
                reddit_post.upvote_ratio != 0.5 else round(reddit_post.score /
                                                           2))
            reddit_post.downs = reddit_post.ups - reddit_post.score
        else:
            reddit_post.ups = -1
            reddit_post.downs = -1
            reddit_post.upvote_ratio = -1

        self._db.addPosts([post, reddit_post])
        return post, reddit_post

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])

    def _add_claim(self, claim_id):
        claim = Claim()
        claim.claim_id = claim_id
        self._db.addPosts([claim])

    def _init_authors(self):
        self._add_author('Smile_lifeisgood',
                         comment_karma=30880,
                         link_karma=1341,
                         is_gold=1,
                         is_mod=0,
                         is_employee=0)
        self._add_author('Cunty_Balls',
                         comment_karma=7369,
                         link_karma=90,
                         is_gold=1,
                         is_mod=0,
                         is_employee=0)
        self._add_author('I_kick_fuck_nuns',
                         comment_karma=2842,
                         link_karma=5897,
                         is_gold=1,
                         is_mod=0,
                         is_employee=0)
        self._add_author('TheRiseofMindhawk',
                         comment_karma=2261,
                         link_karma=174,
                         is_gold=1,
                         is_mod=1,
                         is_employee=0)
        self._add_author('dialog2011',
                         comment_karma=37027,
                         link_karma=4582,
                         is_gold=0,
                         is_mod=0,
                         is_employee=1)
        self._add_author('chrmanyaki',
                         comment_karma=22588,
                         link_karma=1,
                         is_gold=0,
                         is_mod=0,
                         is_employee=1)
        self._add_author('Undertakerjoe',
                         comment_karma=9177,
                         link_karma=1384,
                         is_gold=0,
                         is_mod=0,
                         is_employee=0)
        self._add_author('Lmb2298',
                         comment_karma=25741,
                         link_karma=1,
                         is_gold=0,
                         is_mod=0,
                         is_employee=0)
        self._add_author('azzazaz',
                         comment_karma=35111,
                         link_karma=171576,
                         is_gold=0,
                         is_mod=1,
                         is_employee=0)
        self._add_author('juanwonone1',
                         comment_karma=6243,
                         link_karma=136,
                         is_gold=0,
                         is_mod=0,
                         is_employee=1)

    def _init_posts(self):
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('juanwonone1', '15/10/2017 21:44', '76ksr4', 738,
                           0.97)[0].guid)
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('Lmb2298', '01/10/2017 22:24', 'dferfgh', 52312,
                           0.77)[0].guid)

        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('dialog2011', '12/06/2017 23:45', '6gv0vk',
                           27)[0].guid)
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('chrmanyaki', '15/10/2017 21:58', 'doeq8ke',
                           27)[0].guid)
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('azzazaz', '12/06/2018 10:50', 'e0j4zkz',
                           32)[0].guid)
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('Smile_lifeisgood', '12/06/2018 20:08', 'e0in2zm',
                           11)[0].guid)
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('Undertakerjoe', '15/10/2017 22:17', 'doerbqu',
                           -13)[0].guid)
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('I_kick_fuck_nuns', '18/06/2017 3:39', 'dj1qid5',
                           2)[0].guid)
        self._add_claim_tweet_connection(
            'cd2e1978-4dfa-3a40-b62f-71153001629c',
            self._add_post('TheRiseofMindhawk', '13/06/2017 8:17', 'ditymrc',
                           2)[0].guid)

        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('I_kick_fuck_nuns', '11/06/2018 18:49', '8qal3m',
                           102, 0.92)[0].guid)
        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('juanwonone1', '16/10/2017 2:23', 'dof4fen', -321,
                           0.3)[0].guid)

        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('Smile_lifeisgood', '13/06/2017 0:29', 'dditbt8r',
                           11)[0].guid)
        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('Lmb2298', '15/10/2017 22:38', 'doeslie',
                           11)[0].guid)
        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('azzazaz', '16/10/2017 0:30', 'doeyvtb', 9)[0].guid)
        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('juanwonone1', '15/10/2017 22:50', 'doetc6j',
                           7)[0].guid)
        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('Cunty_Balls', '16/10/2017 1:52', 'dof2x1x',
                           2)[0].guid)
        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('Cunty_Balls', '16/10/2017 2:43', 'dof5cpo',
                           2)[0].guid)
        self._add_claim_tweet_connection(
            'a4beae51-463f-33fc-bbf6-20eca5104afe',
            self._add_post('juanwonone1', '16/10/2017 3:45', 'dof84f8',
                           1)[0].guid)

        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('Cunty_Balls', '15/10/2017 22:24', 'doerqsj', 234,
                           0.53)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('I_kick_fuck_nuns', '16/10/2017 21:44', '76ksr2',
                           123, 0.6)[0].guid)

        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('Smile_lifeisgood', '13/06/2017 7:04', 'ditvpox',
                           7)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('Smile_lifeisgood', '13/06/2017 0:51', 'ditcy28',
                           5)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('juanwonone1', '15/10/2017 23:36', 'doevzsq',
                           5)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('juanwonone1', '16/10/2017 0:26', 'doeynrr',
                           5)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('I_kick_fuck_nuns', '11/06/2018 21:55', 'e0hy5he',
                           1)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('I_kick_fuck_nuns', '11/06/2018 22:04', 'e0hyrhi',
                           1)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('I_kick_fuck_nuns', '12/06/2018 1:31', 'e0icveq',
                           1)[0].guid)
        self._add_claim_tweet_connection(
            '9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
            self._add_post('Cunty_Balls', '13/06/2017 7:55', 'ditxua6',
                           3)[0].guid)

    def _init_claims(self):
        self._add_claim('cd2e1978-4dfa-3a40-b62f-71153001629c')
        self._add_claim('a4beae51-463f-33fc-bbf6-20eca5104afe')
        self._add_claim('9e875999-9a3e-3357-bfa6-ede4fe67c1c9')

    def _get_params(self):
        return {'authors': [], 'posts': []}


# test_cases = [
#             {
#                 'claim_id': u'cd2e1978-4dfa-3a40-b62f-71153001629c',
#                 'test_name': 'min_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'a4beae51-463f-33fc-bbf6-20eca5104afe',
#                 'test_name': 'min_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
#                 'test_name': 'min_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'cd2e1978-4dfa-3a40-b62f-71153001629c',
#                 'test_name': 'max_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'a4beae51-463f-33fc-bbf6-20eca5104afe',
#                 'test_name': 'max_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
#                 'test_name': 'max_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'cd2e1978-4dfa-3a40-b62f-71153001629c',
#                 'test_name': 'mean_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'a4beae51-463f-33fc-bbf6-20eca5104afe',
#                 'test_name': 'mean_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
#                 'test_name': 'mean_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'cd2e1978-4dfa-3a40-b62f-71153001629c',
#                 'test_name': 'median_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'a4beae51-463f-33fc-bbf6-20eca5104afe',
#                 'test_name': 'median_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
#                 'test_name': 'median_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'cd2e1978-4dfa-3a40-b62f-71153001629c',
#                 'test_name': 'skew_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'a4beae51-463f-33fc-bbf6-20eca5104afe',
#                 'test_name': 'skew_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
#                 'test_name': 'skew_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'cd2e1978-4dfa-3a40-b62f-71153001629c',
#                 'test_name': 'kurtosis_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'a4beae51-463f-33fc-bbf6-20eca5104afe',
#                 'test_name': 'kurtosis_',
#                 'expected':
#             },
#             {
#                 'claim_id': u'9e875999-9a3e-3357-bfa6-ede4fe67c1c9',
#                 'test_name': 'kurtosis_',
#                 'expected':
#             }
#         ]
Beispiel #5
0
class TestFakeNewsFeatureGenerator(TestCase):
    def setUp(self):
        self._db = DB()

        self._db.setUp()
        self._posts = []
        self._author = None

    def tearDown(self):
        self._db.session.close()

    def test_get_word_count_1_claim_1_comments_no_words(self):
        self._add_author('author_guid')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00")
        self._add_post("post1", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._db.session.commit()

        self.fake_news_feature_generator = FakeNewsFeatureGenerator(self._db)
        self.fake_news_feature_generator.setUp()
        self.fake_news_feature_generator.execute()

        self.assert_word_dictionary_count('post0', {})

        self.assert_word_dictionary_fraction('post0', {})

        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_count_sum')
        self.assertEqual('0', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_fraction_sum')
        self.assertEqual('0.0', author_feature.attribute_value)

    def test_get_word_count_1_claim_1_comments_1_words(self):
        self._add_author('author_guid')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00")
        self._add_post("post1", "1 bad word liar", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._db.session.commit()

        self.fake_news_feature_generator = FakeNewsFeatureGenerator(self._db)
        self.fake_news_feature_generator.setUp()
        self.fake_news_feature_generator.execute()

        self.assert_word_dictionary_count('post0', {'liar': '1'})

        self.assert_word_dictionary_fraction('post0', {'liar': '1.0'})

        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_count_sum')
        self.assertEqual('1', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_fraction_sum')
        self.assertEqual('1.0', author_feature.attribute_value)

    def test_get_word_count_1_claim_4_comments_1_words(self):
        self._add_author('author_guid')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00")
        self._add_post("post1", "1 bad word liar", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post3", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post4", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")
        self._db.session.commit()

        self.fake_news_feature_generator = FakeNewsFeatureGenerator(self._db)
        self.fake_news_feature_generator.setUp()
        self.fake_news_feature_generator.execute()

        self.assert_word_dictionary_count('post0', {'liar': '1'})

        self.assert_word_dictionary_fraction('post0', {'liar': '0.25'})

        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_count_sum')
        self.assertEqual('1', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_fraction_sum')
        self.assertEqual('0.25', author_feature.attribute_value)

    def test_get_word_count_1_claim_4_comments_8_words(self):
        self._add_author('author')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00")
        self._add_post("post1", "1 liar bad word liar", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words liar at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post3", "no liar bad words at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post4", " liar no liar bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")
        self._db.session.commit()

        self.fake_news_feature_generator = FakeNewsFeatureGenerator(self._db)
        self.fake_news_feature_generator.setUp()
        self.fake_news_feature_generator.execute()

        self.assert_word_dictionary_count('post0', {'liar': '8'})

        self.assert_word_dictionary_fraction('post0', {'liar': '2.0'})

        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_count_sum')
        self.assertEqual('8', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_fraction_sum')
        self.assertEqual('2.0', author_feature.attribute_value)

    def test_get_word_count_1_claim_4_comments_8_different_words(self):
        self._add_author('author')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00")
        self._add_post("post1", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post3", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post4", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")
        self._db.session.commit()

        self.fake_news_feature_generator = FakeNewsFeatureGenerator(self._db)
        self.fake_news_feature_generator.setUp()
        self.fake_news_feature_generator.execute()

        self.assert_word_dictionary_count(
            'post0', {
                'liar': '3',
                'joke': '2',
                'didnt actually': '1',
                'untrue': '1',
                'laugh': '1'
            })

        self.assert_word_dictionary_fraction(
            'post0', {
                'liar': '0.75',
                'joke': '0.5',
                'didnt actually': '0.25',
                'untrue': '0.25',
                'laugh': '0.25'
            })

        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_count_sum')
        self.assertEqual('3', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_words_fraction_sum')
        self.assertEqual('0.75', author_feature.attribute_value)

    def test_get_claim_type_4_claim(self):
        self._add_author('author_guid')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00")
        self._add_claim('post1', 'the claim', "2017-06-10 05:00:00", 'FALSE')
        self._add_claim('post2', 'the claim', "2017-06-10 05:00:00",
                        'pants-fire')
        self._add_claim('post3', 'the claim', "2017-06-10 05:00:00",
                        'mostly-false')
        self._add_claim('post4', 'the claim', "2017-06-10 05:00:00", 'TRUE')
        self._add_claim('post5', 'the claim', "2017-06-10 05:00:00",
                        'mostly-true')
        self._add_claim('post6', 'the claim', "2017-06-10 05:00:00",
                        'half_true')
        self._add_claim('post7', 'the claim', "2017-06-10 05:00:00",
                        'unproven')
        self._db.session.commit()

        self.fake_news_feature_generator = FakeNewsFeatureGenerator(self._db)
        self.fake_news_feature_generator._domain = 'Claim'
        self.fake_news_feature_generator.setUp()
        self.fake_news_feature_generator.execute()

        author_feature = self._db.get_author_feature(
            'post0', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual(None, author_feature)
        author_feature = self._db.get_author_feature(
            'post1', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual('False', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post2', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual('False', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post3', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual('False', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post4', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual('True', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post5', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual('True', author_feature.attribute_value)
        author_feature = self._db.get_author_feature(
            'post6', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual(None, author_feature)
        author_feature = self._db.get_author_feature(
            'post7', 'FakeNewsFeatureGenerator_claim_verdict')
        self.assertEqual(None, author_feature)

    def assert_word_dictionary_count(self, author_guid, values):
        self.assert_dictionary_words(author_guid,
                                     'FakeNewsFeatureGenerator_{0}_count', '0',
                                     values)

    def assert_word_dictionary_fraction(self, author_guid, values):
        self.assert_dictionary_words(author_guid,
                                     'FakeNewsFeatureGenerator_{0}_fraction',
                                     '0.0', values)

    def assert_dictionary_words(self, author_guid, count_template,
                                default_value, values):
        fake_news_dictionary_words = self.fake_news_feature_generator._fake_news_dictionary
        for word in fake_news_dictionary_words:
            word = word.strip().replace(' ', '-')

            author_feature = self._db.get_author_feature(
                author_guid, count_template.format(word))
            if word in values:
                self.assertEqual(values[word], author_feature.attribute_value)
            else:
                self.assertEqual(default_value, author_feature.attribute_value)

    def _add_author(self, author_guid):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = 'test author'
        author.author_screen_name = author_guid
        author.name = 'test'
        author.domain = 'tests'
        author.statuses_count = 0
        author.created_at = "2017-06-14 05:00:00"
        self._db.add_author(author)
        self._author = author

    def _add_post(self,
                  title,
                  content,
                  date_str,
                  domain='Microblog',
                  post_type=None):
        post = Post()
        post.author = self._author.author_guid
        post.author_guid = self._author.author_guid
        post.content = content
        post.title = title
        post.domain = domain
        post.post_id = title
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime(date_str)
        post.created_at = post.date
        post.post_type = post_type
        self._db.addPost(post)
        self._posts.append(post)

    def _get_params(self):
        posts = {self._author.author_guid: self._posts}
        params = params = {'authors': [self._author], 'posts': posts}
        return params

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])
        pass

    def _add_claim(self, claim_id, content, date_str, post_type=None):
        claim = Claim()
        claim.claim_id = claim_id
        claim.verdict = post_type
        claim.title = claim_id
        claim.description = content
        claim.verdict_date = convert_str_to_unicode_datetime(date_str)
        claim.url = "claim url"
        self._db.addPost(claim)
Beispiel #6
0
class TestFakeNewsClassifier(TestCase):
    def setUp(self):
        self._db = DB()

        self._db.setUp()
        self._posts = []
        self._author = None

    def tearDown(self):
        self._db.session.close()

    def test_classify_by_dictionary_1_FN_1_FP(self):
        self._add_author('author')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00", 'TRUE')
        self._add_post("post1", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post3", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post4", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")

        self._add_author('author_guid')
        self._add_claim('post5', 'the claim', "2017-06-10 05:00:00", 'FALSE')
        self._add_post("post6", "1 bad word at all", "2017-06-12 05:00:00")
        self._add_post("post7", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post8", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post9", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post5", "post6")
        self._add_claim_tweet_connection("post5", "post7")
        self._add_claim_tweet_connection("post5", "post8")
        self._add_claim_tweet_connection("post5", "post9")
        self._db.session.commit()

        self.fake_news_feature_classifier = FakeNewsClassifier(self._db)
        self.fake_news_feature_classifier.setUp()
        self.fake_news_feature_classifier.execute()
        output_file_path = self.fake_news_feature_classifier._output_path + '/fake_news_classifier_results.csv'
        output_file = open(output_file_path, 'r')
        reader = csv.DictReader(output_file)
        output_data = next(reader)
        self.assertAlmostEqual(float(output_data['FN (think good but bad)']),
                               1)
        self.assertAlmostEqual(float(output_data['FP (think bad but good)']),
                               1)
        self.assertAlmostEqual(float(output_data['accuracy']), 0.0)
        self.assertAlmostEqual(float(output_data['AUC']), 0.0)

    def test_classify_by_dictionary_1_FN_1_FP_and_ignore_1(self):
        self._add_author('author')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00", 'TRUE')
        self._add_post("post1", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post3", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post4", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")

        self._add_author('author_guid')
        self._add_claim('post5', 'the claim', "2017-06-10 05:00:00", 'FALSE')
        self._add_post("post6", "1 bad word at all", "2017-06-12 05:00:00")
        self._add_post("post7", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post8", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post9", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post5", "post6")
        self._add_claim_tweet_connection("post5", "post7")
        self._add_claim_tweet_connection("post5", "post8")
        self._add_claim_tweet_connection("post5", "post9")

        self._add_claim('post10', 'the claim', "2017-06-10 05:00:00",
                        'unknown')
        self._add_post("post11", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post12", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post13", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post14", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post10", "post11")
        self._add_claim_tweet_connection("post10", "post12")
        self._add_claim_tweet_connection("post10", "post13")
        self._add_claim_tweet_connection("post10", "post14")

        self._db.session.commit()

        self.fake_news_feature_classifier = FakeNewsClassifier(self._db)
        self.fake_news_feature_classifier.setUp()
        self.fake_news_feature_classifier.execute()
        output_file_path = self.fake_news_feature_classifier._output_path + '/fake_news_classifier_results.csv'
        output_file = open(output_file_path, 'r')
        reader = csv.DictReader(output_file)
        output_data = next(reader)
        self.assertAlmostEqual(float(output_data['FN (think good but bad)']),
                               1)
        self.assertAlmostEqual(float(output_data['FP (think bad but good)']),
                               1)
        self.assertAlmostEqual(float(output_data['accuracy']), 0.0)
        self.assertAlmostEqual(float(output_data['AUC']), 0.0)

    def test_classify_by_dictionary_0_FN_0_FP(self):
        self._add_author('author')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00", 'FALSE')
        self._add_post("post1", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post3", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post4", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")

        self._add_author('author_guid')
        self._add_claim('post5', 'the claim', "2017-06-10 05:00:00", 'TRUE')
        self._add_post("post6", "1 bad word at all", "2017-06-12 05:00:00")
        self._add_post("post7", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post8", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post9", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post5", "post6")
        self._add_claim_tweet_connection("post5", "post7")
        self._add_claim_tweet_connection("post5", "post8")
        self._add_claim_tweet_connection("post5", "post9")
        self._db.session.commit()

        self.fake_news_feature_classifier = FakeNewsClassifier(self._db)
        self.fake_news_feature_classifier.setUp()
        self.fake_news_feature_classifier.execute()
        output_file_path = self.fake_news_feature_classifier._output_path + '/fake_news_classifier_results.csv'
        output_file = open(output_file_path, 'r')
        reader = csv.DictReader(output_file)
        output_data = next(reader)
        self.assertAlmostEqual(float(output_data['FN (think good but bad)']),
                               0)
        self.assertAlmostEqual(float(output_data['FP (think bad but good)']),
                               0)
        self.assertAlmostEqual(float(output_data['accuracy']), 1.0)
        self.assertAlmostEqual(float(output_data['AUC']), 1.0)

    def test_classify_by_dictionary_1_FN_0_FP_3_claims(self):
        self._add_author('author')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00", 'FALSE')
        self._add_post("post1", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post3", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post4", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")

        self._add_author('author_guid')
        self._add_claim('post5', 'the claim', "2017-06-10 05:00:00", 'TRUE')
        self._add_post("post6", "1 bad word at all", "2017-06-12 05:00:00")
        self._add_post("post7", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post8", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post9", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post5", "post6")
        self._add_claim_tweet_connection("post5", "post7")
        self._add_claim_tweet_connection("post5", "post8")
        self._add_claim_tweet_connection("post5", "post9")

        self._add_author('author_guid')
        self._add_claim('post10', 'the claim', "2017-06-10 05:00:00", 'FALSE')
        self._add_post("post11", "1 bad word at all", "2017-06-12 05:00:00")
        self._add_post("post12", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post13", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post14", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post10", "post11")
        self._add_claim_tweet_connection("post10", "post12")
        self._add_claim_tweet_connection("post10", "post13")
        self._add_claim_tweet_connection("post10", "post14")
        self._db.session.commit()

        self.fake_news_feature_classifier = FakeNewsClassifier(self._db)
        self.fake_news_feature_classifier.setUp()
        self.fake_news_feature_classifier.execute()
        output_file_path = self.fake_news_feature_classifier._output_path + '/fake_news_classifier_results.csv'
        output_file = open(output_file_path, 'r')
        reader = csv.DictReader(output_file)
        output_data = next(reader)
        self.assertAlmostEqual(float(output_data['FN (think good but bad)']),
                               1)
        self.assertAlmostEqual(float(output_data['FP (think bad but good)']),
                               0)
        self.assertAlmostEqual(float(output_data['accuracy']),
                               0.666666,
                               places=4)
        self.assertAlmostEqual(float(output_data['AUC']), 0.75)

    def test_classify_by_dictionary_0_FN_1_FP_3_claims(self):
        self._add_author('author')
        self._add_claim('post0', 'the claim', "2017-06-10 05:00:00", 'FALSE')
        self._add_post("post1", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post2", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post3", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post4", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post0", "post1")
        self._add_claim_tweet_connection("post0", "post2")
        self._add_claim_tweet_connection("post0", "post3")
        self._add_claim_tweet_connection("post0", "post4")

        self._add_claim('post5', 'the claim', "2017-06-10 05:00:00", 'TRUE')
        self._add_post("post6", "1 bad word at all", "2017-06-12 05:00:00")
        self._add_post("post7", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post8", "no bad words at all", "2017-06-12 05:00:00")
        self._add_post("post9", "no bad words at all", "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post5", "post6")
        self._add_claim_tweet_connection("post5", "post7")
        self._add_claim_tweet_connection("post5", "post8")
        self._add_claim_tweet_connection("post5", "post9")

        self._add_claim('post10', 'the claim', "2017-06-10 05:00:00", 'TRUE')
        self._add_post("post11", "1 liar bad word joke", "2017-06-12 05:00:00")
        self._add_post("post12", "no bad words untrue at all liar",
                       "2017-06-12 05:00:00")
        self._add_post("post13", "no joke bad words at all laugh",
                       "2017-06-12 05:00:00")
        self._add_post("post14", " liar no didnt actually bad words at all",
                       "2017-06-12 05:00:00")
        self._add_claim_tweet_connection("post10", "post11")
        self._add_claim_tweet_connection("post10", "post12")
        self._add_claim_tweet_connection("post10", "post13")
        self._add_claim_tweet_connection("post10", "post14")
        self._db.session.commit()

        self.fake_news_feature_classifier = FakeNewsClassifier(self._db)
        self.fake_news_feature_classifier.setUp()
        self.fake_news_feature_classifier.execute()
        output_file_path = self.fake_news_feature_classifier._output_path + '/fake_news_classifier_results.csv'
        output_file = open(output_file_path, 'r')
        reader = csv.DictReader(output_file)
        output_data = next(reader)
        self.assertAlmostEqual(float(output_data['FN (think good but bad)']),
                               0)
        self.assertAlmostEqual(float(output_data['FP (think bad but good)']),
                               1)
        self.assertAlmostEqual(float(output_data['accuracy']),
                               0.666666,
                               places=4)
        self.assertAlmostEqual(float(output_data['AUC']), 0.75)

    def _add_author(self, author_guid):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = 'test author'
        author.author_screen_name = author_guid
        author.name = 'test'
        author.domain = 'tests'
        author.statuses_count = 0
        author.created_at = "2017-06-14 05:00:00"
        self._db.add_author(author)
        self._author = author

    def _add_post(self,
                  title,
                  content,
                  date_str,
                  domain='Microblog',
                  post_type=None):
        post = Post()
        post.author = self._author.author_guid
        post.author_guid = self._author.author_guid
        post.content = content
        post.title = title
        post.domain = domain
        post.post_id = title
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime(date_str)
        post.created_at = post.date
        post.post_type = post_type
        self._db.addPost(post)
        self._posts.append(post)

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])
        pass

    def _add_claim(self, claim_id, content, date_str, post_type=None):
        claim = Claim()
        claim.claim_id = claim_id
        claim.verdict = post_type
        claim.title = claim_id
        claim.description = content
        claim.verdict_date = convert_str_to_unicode_datetime(date_str)
        claim.url = "claim url"
        self._db.addPost(claim)
Beispiel #7
0
class TestEntityToTopicConverter(TestCase):
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._posts = []
        self._post_dictionary = {}
        self._authors = []
        self._add_author('test author')
        self._preprocess_visualization = EntityToTopicConverter(self._db)

    def tearDown(self):
        self._db.session.close_all()
        self._db.deleteDB()
        self._db.session.close()

    def test_generate_topics_no_topics(self):
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {},
            'destination': {}
        }
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_topics_tables(
            source_id_target_elements_dict, arg)
        topics = self._db.get_topics()
        self.assertEqual(topics, [])

    def test_generate_topics_from_1_claim(self):
        self._add_post("test author", 'claim1', 'claim1 content', 'Claim')
        self._db.session.commit()
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {},
            'destination': {}
        }
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_topics_tables(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.save_topic_entities()

        self.assertTopicInserted('claim1')

    def test_generate_topics_from_1_claim_and_remove_stop_words(self):
        self._add_post("test author", 'claim1', 'claim1 go to the house',
                       'Claim')
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {},
            'destination': {}
        }
        self._db.session.commit()
        self._preprocess_visualization._remove_stop_words = True
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_topics_tables(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.save_topic_entities()

        self.assertTopicInserted('claim1')

    def test_generate_topics_from_5_claims(self):
        self._add_post("test author", 'claim1', 'claim1 content', 'Claim')
        self._add_post("test author", 'claim2', 'claim2 content', 'Claim')
        self._add_post("test author", 'claim3', 'claim3 content move', 'Claim')
        self._add_post("test author", 'claim4', 'claim4 dif data', 'Claim')
        self._add_post("test author", 'claim5', 'claim5 some boring text',
                       'Claim')
        self._db.session.commit()
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {},
            'destination': {}
        }
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_topics_tables(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.save_topic_entities()

        self.assertTopicInserted('claim1')
        self.assertTopicInserted('claim2')
        self.assertTopicInserted('claim3')
        self.assertTopicInserted('claim4')
        self.assertTopicInserted('claim5')

    def test_generate_post_topic_mapping_no_claim(self):
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {},
            'destination': {}
        }
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_post_topic_mapping(
            source_id_target_elements_dict, arg)
        mappings = self._db.get_post_topic_mapping()
        self.assertEqual(0, len(mappings))

    def test_generate_post_topic_mapping_1_claim(self):
        self._add_post("test author", 'claim1', 'claim1 content', 'Claim')
        self._add_post("test author", 'post1', 'post1 content of data',
                       'Microblog')
        self._add_post("test author", 'post2', 'post2  bla bla', 'Microblog')
        self._add_post("test author", 'post3', 'post3 noting  new',
                       'Microblog')
        self._add_claim_tweet_connection('claim1', 'post1')
        self._add_claim_tweet_connection('claim1', 'post2')
        self._add_claim_tweet_connection('claim1', 'post3')
        self._db.session.commit()
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id',
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_topics_tables(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.generate_post_topic_mapping(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.save_topic_entities()

        mappings = self._db.get_post_topic_mapping()
        mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist)
                    for tm in mappings]
        topic_id = self._preprocess_visualization.get_source_id_topic_dictionary(
        )['claim1']
        self.assertEqual(3, len(mappings))
        self.assertSetEqual(
            {('post1', topic_id, 1.0), ('post2', topic_id, 1.0),
             ('post3', topic_id, 1.0)}, set(mappings))

    def test_generate_post_topic_mapping_2_claim(self):
        self._add_post("test author", 'claim1', 'claim1 content', 'Claim')
        self._add_post("test author", 'claim2', 'claim1 content', 'Claim')
        self._add_post("test author", 'post1', 'post1 content of data',
                       'Microblog')
        self._add_post("test author", 'post2', 'post2  bla bla', 'Microblog')
        self._add_post("test author", 'post3', 'post3 noting  new',
                       'Microblog')
        self._add_post("test author", 'post4', 'post4  bla bla', 'Microblog')
        self._add_post("test author", 'post5', 'post5 noting  new',
                       'Microblog')
        self._add_claim_tweet_connection('claim1', 'post1')
        self._add_claim_tweet_connection('claim1', 'post2')
        self._add_claim_tweet_connection('claim1', 'post3')
        self._add_claim_tweet_connection('claim2', 'post4')
        self._add_claim_tweet_connection('claim2', 'post5')
        self._db.session.commit()
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id',
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_topics_tables(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.generate_post_topic_mapping(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.save_topic_entities()

        mappings = self._db.get_post_topic_mapping()
        mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist)
                    for tm in mappings]
        topic_id1 = self._preprocess_visualization.get_source_id_topic_dictionary(
        )['claim1']
        topic_id2 = self._preprocess_visualization.get_source_id_topic_dictionary(
        )['claim2']
        self.assertEqual(5, len(mappings))
        self.assertSetEqual(
            {('post1', topic_id1, 1.0), ('post2', topic_id1, 1.0),
             ('post3', topic_id1, 1.0), ('post4', topic_id2, 1.0),
             ('post5', topic_id2, 1.0)}, set(mappings))

    def test__generate_author_topic_mapping_2_claim(self):
        self._add_author('test author2')
        self._add_post("test author", 'claim1', 'claim1 content', 'Claim')
        self._add_post("test author2", 'claim2', 'claim1 content', 'Claim')
        self._add_post("test author", 'post1', 'post1 content of data',
                       'Microblog')
        self._add_post("test author", 'post2', 'post2  bla bla', 'Microblog')
        self._add_post("test author", 'post3', 'post3 noting  new',
                       'Microblog')
        self._add_post("test author", 'post4', 'post4  bla bla', 'Microblog')
        self._add_post("test author", 'post5', 'post5 noting  new',
                       'Microblog')
        self._add_claim_tweet_connection('claim1', 'post1')
        self._add_claim_tweet_connection('claim1', 'post2')
        self._add_claim_tweet_connection('claim1', 'post3')
        self._add_claim_tweet_connection('claim2', 'post4')
        self._add_claim_tweet_connection('claim2', 'post5')
        self._db.session.commit()
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id',
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        self._preprocess_visualization._domain = "Microblog"
        source_id_target_elements_dict = self._preprocess_visualization._get_source_id_target_elements(
            arg)
        self._preprocess_visualization.generate_topics_tables(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.generate_post_topic_mapping(
            source_id_target_elements_dict, arg)
        self._preprocess_visualization.generate_author_topic_mapping()
        self._preprocess_visualization.save_topic_entities()
        mapping = self._db.get_author_topic_mapping()
        self.assertEqual(2, len(mapping))
        self.assertSetEqual({('test author', 0.6, 0.4),
                             ('test author2', 0, 0)}, set(mapping))

    def test_visualization(self):
        self._add_author('test author2', "bad_actor")
        self._add_post("test author", 'claim1', 'claim1 content', 'Claim')
        self._add_post("test author2", 'claim2', 'claim2 content', 'Claim')
        self._add_post("test author", 'post1', 'post1 content of data',
                       'Microblog')
        self._add_post("test author", 'post2', 'post2  bla bla', 'Microblog')
        self._add_post("test author", 'post3', 'post3 noting  new',
                       'Microblog')
        self._add_post("test author2", 'post4', 'post4  bla bla', 'Microblog')
        self._add_post("test author2", 'post5', 'post5 noting  new',
                       'Microblog')
        self._add_claim_tweet_connection('claim1', 'post1')
        self._add_claim_tweet_connection('claim1', 'post2')
        self._add_claim_tweet_connection('claim1', 'post4')
        self._add_claim_tweet_connection('claim2', 'post3')
        self._add_claim_tweet_connection('claim2', 'post5')
        self._db.session.commit()
        self._preprocess_visualization._domain = "Microblog"
        self._preprocess_visualization.execute()

        author_topic_mapping = self._db.get_author_topic_mapping()
        post_topic_mappings = self._db.get_post_topic_mapping()
        post_topic_mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist)
                               for tm in post_topic_mappings]
        topic_id1 = self._preprocess_visualization.get_source_id_topic_dictionary(
        )['claim1']
        topic_id2 = self._preprocess_visualization.get_source_id_topic_dictionary(
        )['claim2']
        self.assertEqual(2, len(author_topic_mapping))
        self.assertSetEqual(
            {('test author', 0.666666666667, 0.333333333333),
             ('test author2', 0.5, 0.5)}, set(author_topic_mapping))
        self.assertSetEqual(
            {('post1', topic_id1, 1.0), ('post2', topic_id1, 1.0),
             ('post3', topic_id2, 1.0), ('post4', topic_id1, 1.0),
             ('post5', topic_id2, 1.0)}, set(post_topic_mappings))

    def test_double_execution_visualization(self):
        self._add_author('test author2', "bad_actor")
        self._add_post("test author", 'claim1', 'claim1 content', 'Claim')
        self._add_post("test author2", 'claim2', 'claim2 content', 'Claim')
        self._add_post("test author", 'post1', 'post1 content of data',
                       'Microblog')
        self._add_post("test author", 'post2', 'post2  bla bla', 'Microblog')
        self._add_post("test author", 'post3', 'post3 noting  new',
                       'Microblog')
        self._add_post("test author2", 'post4', 'post4  bla bla', 'Microblog')
        self._add_post("test author2", 'post5', 'post5 noting  new',
                       'Microblog')
        self._add_claim_tweet_connection('claim1', 'post1')
        self._add_claim_tweet_connection('claim1', 'post2')
        self._add_claim_tweet_connection('claim1', 'post4')
        self._add_claim_tweet_connection('claim2', 'post3')
        self._add_claim_tweet_connection('claim2', 'post5')
        self._db.session.commit()
        self._preprocess_visualization._domain = "Microblog"
        self._preprocess_visualization.execute()
        self._preprocess_visualization.execute()

        author_topic_mapping = self._db.get_author_topic_mapping()
        post_topic_mappings = self._db.get_post_topic_mapping()
        post_topic_mappings = [(tm.post_id, tm.max_topic_id, tm.max_topic_dist)
                               for tm in post_topic_mappings]
        topic_id1 = self._preprocess_visualization.get_source_id_topic_dictionary(
        )['claim1']
        topic_id2 = self._preprocess_visualization.get_source_id_topic_dictionary(
        )['claim2']
        self.assertEqual(2, len(author_topic_mapping))
        self.assertSetEqual(
            {('test author', 0.666666666667, 0.333333333333),
             ('test author2', 0.5, 0.5)}, set(author_topic_mapping))
        self.assertSetEqual(
            {('post1', topic_id1, 1.0), ('post2', topic_id1, 1.0),
             ('post3', topic_id2, 1.0), ('post4', topic_id1, 1.0),
             ('post5', topic_id2, 1.0)}, set(post_topic_mappings))

    def assertTopicInserted(self, claim_id):
        topics = self._db.get_topics()
        terms = self._db.get_terms()
        topic_dict = defaultdict(set)
        term_dict = {term.term_id: term.description for term in terms}
        for topic_id, term_id, prob in topics:
            topic_dict[topic_id].add(term_dict[term_id])
        topic_id = self._preprocess_visualization.get_source_id_topic_dictionary(
        )[claim_id]
        self.assertIn(topic_id, topic_dict)
        expected = set(
            clean_tweet(self._post_dictionary[claim_id].content).split(' '))
        if self._preprocess_visualization._remove_stop_words:
            expected = set(
                clean_content_by_nltk_stopwords(
                    self._post_dictionary[claim_id].content).split(' '))
        self.assertSetEqual(expected, topic_dict[topic_id])

    def _add_author(self, author_guid, type="good_actor"):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = author_guid
        author.author_screen_name = author_guid
        author.name = author_guid
        author.domain = 'Microblog'
        author.author_type = type
        self._db.add_author(author)
        self._authors.append(author)

    def _add_post(self, author_guid, title, content, domain='Microblog'):
        post = Post()
        post.author = author_guid
        post.author_guid = author_guid
        post.content = content
        post.title = title
        post.domain = domain
        post.post_id = title
        post.guid = post.post_id
        post.is_detailed = True
        post.is_LB = False
        self._db.addPost(post)
        self._posts.append(post)
        self._post_dictionary[post.post_id] = post

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])
        pass