def test_num_of_known_words_in_empty_sentence(self):
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     kwon_words_count = self._known_words_number_feature_generator._num_of_known_words_in_sentence(
         u"")
     self.assertEqual(0, kwon_words_count)
    def _execute_module(self):
        self._db.session.commit()
        params = self._get_params()

        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator.execute()
 def test_get_post_id_to_post_content_case_only_destination_in_args(self):
     self._add_post(u'post2', u'dog cat pig man')  # 2
     self._add_post(u'post3', u'TV is the best guys', u"Claim")  # 1
     self._db.session.commit()
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         u'subjects')
     arg = {
         'source': {
             'table_name': 'posts',
             'id': 'post_id',
             'target_field': 'content',
             "where_clauses": [{
                 "field_name": "domain",
                 "value": "Claim"
             }]
         },
         'connection': {},
         'destination': {}
     }
     author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
         arg)
     expected = {u'post3': [self._post_dictionary[u'post3']]}
     self.assertSourceIdElementsDict(author_id_texts_dict, expected)
 def test_case_post_represent_by_posts(self):
     self._add_post(u'post1', u'the claim', u'Claim')
     self._add_post(u'post2', u'dog cat pig man')  # 2
     self._add_post(u'post3', u'TV is the best guys')  # 1
     self._add_claim_tweet_connection(u'post1', u'post2')
     self._add_claim_tweet_connection(u'post1', u'post3')
     self._db.session.commit()
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         u'subjects')
     arg = {
         'source': {
             'table_name': 'posts',
             'id': 'post_id'
         },
         'connection': {
             'table_name': 'claim_tweet_connection',
             'source_id': 'claim_id',
             'target_id': 'post_id',
         },
         'destination': {
             'table_name': 'posts',
             'id': 'post_id',
             'target_field': 'content'
         }
     }
     author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
         arg)
     expected = {
         u'post1':
         [self._post_dictionary[u'post2'], self._post_dictionary[u'post3']]
     }
     self.assertSourceIdElementsDict(author_id_texts_dict, expected)
 def test_load_known_words_from_not_exists_file_to_dict(self):
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         u'no_such_file_exists')
     words_loaded = self._known_words_number_feature_generator._words_dict
     self.assertEqual(words_loaded, {})
 def test_num_of_known_words_in_sentence_with_3_known_word(self):
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         u'subjects')
     kwon_words_count = self._known_words_number_feature_generator._num_of_known_words_in_sentence(
         u"man parent they sentence")
     self.assertEqual(3, kwon_words_count)
 def tes_load_known_words_from_file_to_dict(self):
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         u'subjects')
     words_loaded = self._known_words_number_feature_generator._words_dict
     self.assertTrue('dog' in words_loaded)
     self.assertTrue('guy' in words_loaded)
     self.assertTrue('people' in words_loaded)
     self.assertTrue('you' in words_loaded)
 def test_count_avg_known_words_for_5_sentences(self):
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         'subjects')
     sentences = [
         "man man man sentence", "man parent they sentence",
         "man thoughts hello sentence", "ways what sentence", "hi sentence"
     ]
     avg_known_words = self._known_words_number_feature_generator._count_avg_known_words(
         sentences)
     self.assertAlmostEqual(10.0 / 5.0, avg_known_words, 0.000001)
 def test_case_post_from_claim_represent_by_posts_from_microblog(self):
     self._add_post('post1', 'the claim1', 'Claim')
     self._add_post('post2', 'dog cat pig man')  # 2
     self._add_post('post4', 'the claim4', 'Microblog')
     self._add_post('post3', 'TV is the best guys')  # 1
     self._add_claim_tweet_connection('post1', 'post2')
     self._add_claim_tweet_connection('post1', 'post3')
     self._add_claim_tweet_connection('post1', 'post4')
     self._add_claim_tweet_connection('post4', 'post3')
     self._db.session.commit()
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         'subjects')
     arg = {
         'source': {
             'table_name': 'posts',
             'id': 'post_id',
         },
         'connection': {
             'table_name': 'claim_tweet_connection',
             'source_id': 'claim_id',
             'target_id': 'post_id'
         },
         'destination': {
             'table_name': 'posts',
             'id': 'post_id',
             'target_field': 'content',
             "where_clauses": [{
                 "field_name": "domain",
                 "value": "Microblog"
             }]
         }
     }
     author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
         arg)
     expected = {
         'post1': [
             self._post_dictionary['post2'], self._post_dictionary['post3'],
             self._post_dictionary['post4']
         ],
         'post4': [self._post_dictionary['post3']]
     }
     self.assertSourceIdElementsDict(author_id_texts_dict, expected)
 def test_get_author_id_to_text_case_one_word_in_each_post(self):
     self._add_post('post2', 'dog cat pig man')  # 2
     self._add_post('post3', 'TV is the best guys')  # 1
     self._db.session.commit()
     params = self._get_params()
     self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
         self._db, **params)
     self._known_words_number_feature_generator._load_known_words_to_dict(
         'subjects')
     arg = {
         'source': {
             'table_name': 'authors',
             'id': 'author_guid',
             "where_clauses": [{
                 "field_name": "domain",
                 "value": "tests"
             }]
         },
         'connection': {
             'table_name': 'posts',
             'source_id': 'author_guid',
             'target_id': 'post_id'
         },
         'destination': {
             'table_name': 'posts',
             'id': 'post_id',
             'target_field': 'content'
         }
     }
     author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
         arg)
     expected = {
         'this is a test author':
         [self._post_dictionary['post2'], self._post_dictionary['post3']]
     }
     self.assertSourceIdElementsDict(author_id_texts_dict, expected)
class Known_Words_Number_Feature_generator_Unittests(unittest.TestCase):
    def setUp(self):
        self._config_parser = getConfig()
        self._db = DB()
        self._db.setUp()
        self._posts = []
        self._add_author(u'this is a test author')
        self._post_dictionary = {}

    def tearDown(self):
        self._db.session.close_all()
        self._db.deleteDB()
        self._db.session.close()

    def test_case_simple(self):
        self._add_post(u'post1', u'when\'s why\'ll')
        self._add_post(u'post2', u'you\'re too good to be true')
        self._execute_module()

        self.assert_both_fields(1.5, 0)

    def test_case_no_words_in_one_post(self):
        self._add_post(u'post1', u'wont dog')
        self._add_post(u'post2', u'')
        self._execute_module()
        self.assert_both_fields(0.5, 0.5)

    def test_case_no_words(self):
        self._add_post(u'post1', u' ')
        self._add_post(u'post2', u'')
        self._execute_module()
        self.assert_both_fields(0, 0)

    def test_case_one_word_in_each_post(self):
        self._add_post(u'post1', u'won\'t')
        self._add_post(u'post2', u'dog')
        self._execute_module()
        self.assert_both_fields(0.5, 0.5)

    def test_case_many_subjects(self):
        self._add_post(u'post1', u'kid reasons who girl guy')
        self._execute_module()
        self.assert_both_fields(0, 5)

    def test_case_many_constractions(self):
        self._add_post(u'post1', u'wont wouldnt, wouldve youd, youl!l, youre')
        self._execute_module()
        self.assert_both_fields(6, 0)

    def tes_load_known_words_from_file_to_dict(self):
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        words_loaded = self._known_words_number_feature_generator._words_dict
        self.assertTrue('dog' in words_loaded)
        self.assertTrue('guy' in words_loaded)
        self.assertTrue('people' in words_loaded)
        self.assertTrue('you' in words_loaded)

    def test_load_known_words_from_not_exists_file_to_dict(self):
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'no_such_file_exists')
        words_loaded = self._known_words_number_feature_generator._words_dict
        self.assertEqual(words_loaded, {})

    def test_num_of_known_words_in_empty_sentence(self):
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        kwon_words_count = self._known_words_number_feature_generator._num_of_known_words_in_sentence(
            u"")
        self.assertEqual(0, kwon_words_count)

    def test_num_of_known_words_in_sentence_with_no_known_word(self):
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        kwon_words_count = self._known_words_number_feature_generator._num_of_known_words_in_sentence(
            u"test sentence")
        self.assertEqual(0, kwon_words_count)

    def test_num_of_known_words_in_sentence_with_3_known_word(self):
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        kwon_words_count = self._known_words_number_feature_generator._num_of_known_words_in_sentence(
            u"man parent they sentence")
        self.assertEqual(3, kwon_words_count)

    def test_num_of_known_words_in_sentence_with_1_known_word_3_times(self):
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        kwon_words_count = self._known_words_number_feature_generator._num_of_known_words_in_sentence(
            u"man man man sentence")
        self.assertEqual(3, kwon_words_count)

    def test_count_avg_known_words_for_5_sentences(self):
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        sentences = [
            u"man man man sentence", u"man parent they sentence",
            u"man thoughts hello sentence", u"ways what sentence",
            u"hi sentence"
        ]
        avg_known_words = self._known_words_number_feature_generator._count_avg_known_words(
            sentences)
        self.assertAlmostEqual(10.0 / 5.0, avg_known_words, 0.000001)

    def test_case_post_represent_by_posts(self):
        self._add_post(u'post1', u'the claim', u'Claim')
        self._add_post(u'post2', u'dog cat pig man')  # 2
        self._add_post(u'post3', u'TV is the best guys')  # 1
        self._add_claim_tweet_connection(u'post1', u'post2')
        self._add_claim_tweet_connection(u'post1', u'post3')
        self._db.session.commit()
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id'
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id',
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content'
            }
        }
        author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
            arg)
        expected = {
            u'post1':
            [self._post_dictionary[u'post2'], self._post_dictionary[u'post3']]
        }
        self.assertSourceIdElementsDict(author_id_texts_dict, expected)

    def test_case_post_from_claim_represent_by_posts(self):
        self._add_post(u'post1', u'the claim1', u'Claim')
        self._add_post(u'post4', u'the claim4', u'Microblog')
        self._add_post(u'post2', u'dog cat pig man')  # 2
        self._add_post(u'post3', u'TV is the best guys')  # 1
        self._add_claim_tweet_connection(u'post1', u'post2')
        self._add_claim_tweet_connection(u'post1', u'post3')
        self._add_claim_tweet_connection(u'post4', u'post2')
        self._add_claim_tweet_connection(u'post4', u'post3')
        self._db.session.commit()
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id'
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content'
            }
        }
        author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
            arg)
        expected = {
            u'post4':
            [self._post_dictionary[u'post2'], self._post_dictionary[u'post3']]
        }
        self.assertSourceIdElementsDict(author_id_texts_dict, expected)

    def assertSourceIdElementsDict(self, author_id_texts_dict, expected):
        self.assertEqual(str(dict(author_id_texts_dict)), str(dict(expected)))

    def test_case_post_represent_by_posts_from_microblog(self):
        self._add_post(u'post1', u'the claim1', u'Claim')
        self._add_post(u'post2', u'dog cat pig man')  # 2
        self._add_post(u'post4', u'the claim4', u'Microblog')
        self._add_post(u'post3', u'TV is the best guys')  # 1
        self._add_claim_tweet_connection(u'post1', u'post2')
        self._add_claim_tweet_connection(u'post1', u'post3')
        self._add_claim_tweet_connection(u'post1', u'post4')
        self._add_claim_tweet_connection(u'post4', u'post3')
        self._db.session.commit()
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id'
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
            arg)
        expected = {u'post4': [self._post_dictionary[u'post3']]}
        self.assertSourceIdElementsDict(author_id_texts_dict, expected)

    def test_case_post_from_claim_represent_by_posts_from_microblog(self):
        self._add_post(u'post1', u'the claim1', u'Claim')
        self._add_post(u'post2', u'dog cat pig man')  # 2
        self._add_post(u'post4', u'the claim4', u'Microblog')
        self._add_post(u'post3', u'TV is the best guys')  # 1
        self._add_claim_tweet_connection(u'post1', u'post2')
        self._add_claim_tweet_connection(u'post1', u'post3')
        self._add_claim_tweet_connection(u'post1', u'post4')
        self._add_claim_tweet_connection(u'post4', u'post3')
        self._db.session.commit()
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
            },
            'connection': {
                'table_name': 'claim_tweet_connection',
                'source_id': 'claim_id',
                'target_id': 'post_id'
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Microblog"
                }]
            }
        }
        author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
            arg)
        expected = {
            u'post1': [
                self._post_dictionary[u'post2'],
                self._post_dictionary[u'post3'],
                self._post_dictionary[u'post4']
            ],
            u'post4': [self._post_dictionary[u'post3']]
        }
        self.assertSourceIdElementsDict(author_id_texts_dict, expected)

    def test_get_author_id_to_text_case_one_word_in_each_post(self):
        self._add_post(u'post2', u'dog cat pig man')  # 2
        self._add_post(u'post3', u'TV is the best guys')  # 1
        self._db.session.commit()
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        arg = {
            'source': {
                'table_name': 'authors',
                'id': 'author_guid',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "tests"
                }]
            },
            'connection': {
                'table_name': 'posts',
                'source_id': 'author_guid',
                'target_id': 'post_id'
            },
            'destination': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content'
            }
        }
        author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
            arg)
        expected = {
            u'this is a test author':
            [self._post_dictionary[u'post2'], self._post_dictionary[u'post3']]
        }
        self.assertSourceIdElementsDict(author_id_texts_dict, expected)

    def test_get_post_id_to_post_content_case_only_destination_in_args(self):
        self._add_post(u'post2', u'dog cat pig man')  # 2
        self._add_post(u'post3', u'TV is the best guys', u"Claim")  # 1
        self._db.session.commit()
        params = self._get_params()
        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator._load_known_words_to_dict(
            u'subjects')
        arg = {
            'source': {
                'table_name': 'posts',
                'id': 'post_id',
                'target_field': 'content',
                "where_clauses": [{
                    "field_name": "domain",
                    "value": "Claim"
                }]
            },
            'connection': {},
            'destination': {}
        }
        author_id_texts_dict = self._known_words_number_feature_generator._get_source_id_target_elements(
            arg)
        expected = {u'post3': [self._post_dictionary[u'post3']]}
        self.assertSourceIdElementsDict(author_id_texts_dict, expected)

    def assert_both_fields(self, contrcations, subjects):
        self._generic_test(
            "Known_Words_Number_Feature_generator_count_avg_known_word_from_contractions",
            contrcations)
        self._generic_test(
            "Known_Words_Number_Feature_generator_count_avg_known_word_from_subjects",
            subjects)

    def _generic_test(self, attribute, expected_value):
        db_val = self._db.get_author_feature(self._author.author_guid,
                                             attribute).attribute_value
        self.assertAlmostEqual(float(db_val), float(expected_value))

    def _execute_module(self):
        self._db.session.commit()
        params = self._get_params()

        self._known_words_number_feature_generator = Known_Words_Number_Feature_generator(
            self._db, **params)
        self._known_words_number_feature_generator.execute()

    def _get_params(self):
        posts = {self._author.author_guid: self._posts}
        params = params = {'authors': [self._author], 'posts': posts}
        return params

    def _add_author(self, author_guid):
        author = Author()
        author.author_guid = author_guid
        author.author_full_name = u'test author'
        author.name = u'test'
        author.domain = u'tests'
        self._db.add_author(author)
        self._author = author

    def _add_post(self, title, content, domain=u'Microblog'):
        post = Post()
        post.author = self._author.author_guid
        post.author_guid = self._author.author_guid
        post.content = content
        post.title = title
        post.domain = domain
        post.post_id = title
        post.guid = post.post_id
        post.is_detailed = True
        post.is_LB = False
        self._db.addPost(post)
        self._posts.append(post)
        self._post_dictionary[post.post_id] = post

    def _add_claim_tweet_connection(self, claim_id, post_id):
        connection = Claim_Tweet_Connection()
        connection.claim_id = claim_id
        connection.post_id = post_id
        self._db.add_claim_connections([connection])
        pass