def setUp(self): test_name = self.shortDescription() super(TfIdfTest, self).setUp() self.mock_obj = TfIdf() self.mock_obj._text_set = self.test_data.r_D_text_set self.token_list = res.token_list self.mock_obj.build_model() if test_name == "Test routine build_model() in TfIdf": print "setting up for testing build_model()" elif test_name == "Test routine build_df_model() in TfIdf": print "setting up for testing build_df_model()" self.document_frequency = res.document_frequency elif test_name == "Test routine build_tf_model() in TfIdf": print "setting up for testing build_tf_model()" self.term_frequency = res.term_frequency elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf": print "setting up for testing compute()/build_tf_idf_model()" self.test_data.attach_feature("tf_idf") self.tf_idf_weight = res.tf_idf_weight
def init_attribute(self, attribute_name): """Create attribute instance Parameter --------- attribute_name : string Contains the feature name, to create an attribute instance. """ if attribute_name == "bag_of_words": attribute = BagOfWords(self.bow_model) return attribute elif attribute_name == "tf_idf": attribute = TfIdf(self.bow_model) return attribute elif attribute_name == "readability": attribute = Readability() return attribute elif attribute_name == "variety": attribute = Variety() return attribute elif attribute_name == "perfect_tense": attribute = PerfectTense() return attribute elif attribute_name == "nested_sentence": attribute = NestedSentence() return attribute elif attribute_name == "passive": attribute = Passive() return attribute elif attribute_name == "adjective": attribute = Adjective() return attribute elif attribute_name == "sentence_start": attribute = SentenceStart() return attribute elif attribute_name == "bag_of_pos": attribute = BagOfPos(self.bow_model) return attribute elif attribute_name == "modal_verb": attribute = ModalVerb() return attribute else: raise FeatureNotExistException(attribute_name)
class TfIdfTest(TestBodyAttribute): @classmethod def setUpClass(cls): print "#################### Begin Testing TfIdf Class ####################" + "\n" @classmethod def tearDownClass(cls): print "\n" + "###################### End Testing TfIdf Class ######################" def setUp(self): test_name = self.shortDescription() super(TfIdfTest, self).setUp() self.mock_obj = TfIdf() self.mock_obj._text_set = self.test_data.r_D_text_set self.token_list = res.token_list self.mock_obj.build_model() if test_name == "Test routine build_model() in TfIdf": print "setting up for testing build_model()" elif test_name == "Test routine build_df_model() in TfIdf": print "setting up for testing build_df_model()" self.document_frequency = res.document_frequency elif test_name == "Test routine build_tf_model() in TfIdf": print "setting up for testing build_tf_model()" self.term_frequency = res.term_frequency elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf": print "setting up for testing compute()/build_tf_idf_model()" self.test_data.attach_feature("tf_idf") self.tf_idf_weight = res.tf_idf_weight def tearDown(self): test_name = self.shortDescription() self.korpus_file = None self.anno_file = None self.test_korpus = None self.test_data = None self.token_list = None self.mock_obj = None if test_name == "Test routine build_model() in TfIdf": print "cleaning up for testing build_model()" print "--------------------------------------------------------------" elif test_name == "Test routine build_df_model() in TfIdf": print "cleaning up for testing build_df_model()" print "--------------------------------------------------------------" self.document_frequency = None elif test_name == "Test routine build_tf_model() in TfIdf": print "cleaning up for testing build_tf_model()" print "--------------------------------------------------------------" self.term_frequency = None elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf": print "cleaning up for testing compute()/build_tf_idf_model()" print "--------------------------------------------------------------" self.tf_idf_weight = None def test__tf_idf__build_model(self): """ Test routine build_model() in TfIdf """ self.assertListEqual(sorted(self.token_list), sorted(self.mock_obj.model.keys())) def test__tf_idf__build_df_model(self): """ Test routine build_df_model() in TfIdf """ df_model = collections.OrderedDict(sorted(self.mock_obj.build_df_model().items())) self.assertListEqual(df_model.values(), self.document_frequency) def test__tf_idf__build_tf_model(self): """ Test routine build_tf_model() in TfIdf """ for text in self.mock_obj.text_set: tf_model = collections.OrderedDict(sorted(self.mock_obj.build_tf_model(text.tokenlist).items())) self.assertListEqual(tf_model.values(), self.term_frequency[text.id]) def test__tf_idf__compute(self): """ Test routine build_tf_idf_model()/compute() in TfIdf """ for textpair in self.test_data.real_data.values(): self.assertListEqual(textpair.text1.features["tf_idf"], self.tf_idf_weight[textpair.text1.id]) self.assertListEqual(textpair.text2.features["tf_idf"], self.tf_idf_weight[textpair.text2.id])
class TfIdfTest(TestBodyAttribute): @classmethod def setUpClass(cls): print "#################### Begin Testing TfIdf Class ####################" + "\n" @classmethod def tearDownClass(cls): print "\n" + "###################### End Testing TfIdf Class ######################" def setUp(self): test_name = self.shortDescription() super(TfIdfTest, self).setUp() self.mock_obj = TfIdf() self.mock_obj._text_set = self.test_data.r_D_text_set self.token_list = res.token_list self.mock_obj.build_model() if test_name == "Test routine build_model() in TfIdf": print "setting up for testing build_model()" elif test_name == "Test routine build_df_model() in TfIdf": print "setting up for testing build_df_model()" self.document_frequency = res.document_frequency elif test_name == "Test routine build_tf_model() in TfIdf": print "setting up for testing build_tf_model()" self.term_frequency = res.term_frequency elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf": print "setting up for testing compute()/build_tf_idf_model()" self.test_data.attach_feature("tf_idf") self.tf_idf_weight = res.tf_idf_weight def tearDown(self): test_name = self.shortDescription() self.korpus_file = None self.anno_file = None self.test_korpus = None self.test_data = None self.token_list = None self.mock_obj = None if test_name == "Test routine build_model() in TfIdf": print "cleaning up for testing build_model()" print "--------------------------------------------------------------" elif test_name == "Test routine build_df_model() in TfIdf": print "cleaning up for testing build_df_model()" print "--------------------------------------------------------------" self.document_frequency = None elif test_name == "Test routine build_tf_model() in TfIdf": print "cleaning up for testing build_tf_model()" print "--------------------------------------------------------------" self.term_frequency = None elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf": print "cleaning up for testing compute()/build_tf_idf_model()" print "--------------------------------------------------------------" self.tf_idf_weight = None def test__tf_idf__build_model(self): """ Test routine build_model() in TfIdf """ self.assertListEqual(sorted(self.token_list), sorted(self.mock_obj.model.keys())) def test__tf_idf__build_df_model(self): """ Test routine build_df_model() in TfIdf """ df_model = collections.OrderedDict( sorted(self.mock_obj.build_df_model().items())) self.assertListEqual(df_model.values(), self.document_frequency) def test__tf_idf__build_tf_model(self): """ Test routine build_tf_model() in TfIdf """ for text in self.mock_obj.text_set: tf_model = collections.OrderedDict( sorted(self.mock_obj.build_tf_model(text.tokenlist).items())) self.assertListEqual(tf_model.values(), self.term_frequency[text.id]) def test__tf_idf__compute(self): """ Test routine build_tf_idf_model()/compute() in TfIdf """ for textpair in self.test_data.real_data.values(): self.assertListEqual(textpair.text1.features["tf_idf"], self.tf_idf_weight[textpair.text1.id]) self.assertListEqual(textpair.text2.features["tf_idf"], self.tf_idf_weight[textpair.text2.id])