def test_normal_fit_transform(self): pmi_object = PMI_python3.PMI() scored_matrix = pmi_object.fit_transform( X=self.csr_matrix_, n_jobs=1, n_docs_distribution=self.n_docs_distribution) assert isinstance(scored_matrix, csr_matrix)
def test_multi_process_fit_transform(self): pmi_object = PMI_python3.PMI() scored_matrix = pmi_object.fit_transform( X=self.csr_matrix_, n_jobs=5, n_docs_distribution=self.n_docs_distribution, verbose=True) assert isinstance(scored_matrix, csr_matrix)
def test_output_result_pmi(self): pmi_object = PMI_python3.PMI() scored_matrix = pmi_object.fit_transform( X=self.csr_matrix_, n_jobs=5, n_docs_distribution=self.n_docs_distribution) assert isinstance(scored_matrix, csr_matrix) pmi_scored_dict = ScoredResultObject( scored_matrix=scored_matrix, label2id_dict=self.label2id_dict, feature2id_dict=self.vocabulary).convert_score_matrix2score_record( outformat='items') self.assertTrue(isinstance(pmi_scored_dict, list))
def test_output_result_pmi(self): pmi_object = PMI_python3.PMI() scored_matrix = pmi_object.fit_transform( X=self.csr_matrix_, n_jobs=5, n_docs_distribution=self.n_docs_distribution) assert isinstance(scored_matrix, csr_matrix) pmi_scored_dict = ScoredResultObject( scored_matrix=scored_matrix, label2id_dict=self.label2id_dict, feature2id_dict=self.vocabulary).ScoreMatrix2ScoreDictionary( outformat='items') assert isinstance(pmi_scored_dict, list) import pprint pprint.pprint(pmi_scored_dict)
def test_get_pmi_feature_dictionary(self): """checks if it works or not, that getting scored dictionary object from scored_matrix :return: """ data_csr_object = data_converter.DataConverter( ).labeledMultiDocs2DocFreqMatrix(labeled_documents=self.input_dict, ngram=1, n_jobs=5) assert isinstance(data_csr_object.csr_matrix_, csr_matrix) assert isinstance(data_csr_object.label2id_dict, dict) assert isinstance(data_csr_object.vocabulary, dict) pmi_scored_matrix = PMI_python3.PMI().fit_transform( X=data_csr_object.csr_matrix_, n_jobs=5, n_docs_distribution=data_csr_object.n_docs_distribution) # main part of test # when sort is True, cut_zero is True, outformat is dict pmi_scored_dictionary_objects = ScoredResultObject( scored_matrix=pmi_scored_matrix, label2id_dict=data_csr_object.label2id_dict, feature2id_dict=data_csr_object.vocabulary ).ScoreMatrix2ScoreDictionary(outformat='dict', sort_desc=True, n_jobs=5) assert isinstance(pmi_scored_dictionary_objects, dict) logging.debug(pmi_scored_dictionary_objects) # when sort is True, cut_zero is True, outformat is items pmi_scored_dictionary_objects = ScoredResultObject( scored_matrix=pmi_scored_matrix, label2id_dict=data_csr_object.label2id_dict, feature2id_dict=data_csr_object.vocabulary ).ScoreMatrix2ScoreDictionary(outformat='items', sort_desc=True, n_jobs=5) assert isinstance(pmi_scored_dictionary_objects, list) for d in pmi_scored_dictionary_objects: assert isinstance(d, dict) logging.debug(pmi_scored_dictionary_objects) # when sort is True, cut_zero is False, outformat is dict pmi_scored_dictionary_objects = ScoredResultObject( scored_matrix=pmi_scored_matrix, label2id_dict=data_csr_object.label2id_dict, feature2id_dict=data_csr_object.vocabulary ).ScoreMatrix2ScoreDictionary(outformat='dict', sort_desc=True, n_jobs=5) assert isinstance(pmi_scored_dictionary_objects, dict) logging.debug(pmi_scored_dictionary_objects) # when sort is True, cut_zero is False, outformat is items pmi_scored_dictionary_objects = ScoredResultObject( scored_matrix=pmi_scored_matrix, label2id_dict=data_csr_object.label2id_dict, feature2id_dict=data_csr_object.vocabulary ).ScoreMatrix2ScoreDictionary(outformat='items', sort_desc=True, n_jobs=5) assert isinstance(pmi_scored_dictionary_objects, list) for d in pmi_scored_dictionary_objects: assert isinstance(d, dict) logging.debug(pmi_scored_dictionary_objects)