def test_normal_fit_transform(self):
     pmi_object = PMI_python3.PMI()
     scored_matrix = pmi_object.fit_transform(
         X=self.csr_matrix_,
         n_jobs=1,
         n_docs_distribution=self.n_docs_distribution)
     assert isinstance(scored_matrix, csr_matrix)
 def test_multi_process_fit_transform(self):
     pmi_object = PMI_python3.PMI()
     scored_matrix = pmi_object.fit_transform(
         X=self.csr_matrix_,
         n_jobs=5,
         n_docs_distribution=self.n_docs_distribution,
         verbose=True)
     assert isinstance(scored_matrix, csr_matrix)
    def test_output_result_pmi(self):
        pmi_object = PMI_python3.PMI()
        scored_matrix = pmi_object.fit_transform(
            X=self.csr_matrix_,
            n_jobs=5,
            n_docs_distribution=self.n_docs_distribution)
        assert isinstance(scored_matrix, csr_matrix)

        pmi_scored_dict = ScoredResultObject(
            scored_matrix=scored_matrix,
            label2id_dict=self.label2id_dict,
            feature2id_dict=self.vocabulary).convert_score_matrix2score_record(
                outformat='items')
        self.assertTrue(isinstance(pmi_scored_dict, list))
    def test_output_result_pmi(self):
        pmi_object = PMI_python3.PMI()
        scored_matrix = pmi_object.fit_transform(
            X=self.csr_matrix_,
            n_jobs=5,
            n_docs_distribution=self.n_docs_distribution)
        assert isinstance(scored_matrix, csr_matrix)

        pmi_scored_dict = ScoredResultObject(
            scored_matrix=scored_matrix,
            label2id_dict=self.label2id_dict,
            feature2id_dict=self.vocabulary).ScoreMatrix2ScoreDictionary(
                outformat='items')

        assert isinstance(pmi_scored_dict, list)
        import pprint
        pprint.pprint(pmi_scored_dict)
예제 #5
0
    def test_get_pmi_feature_dictionary(self):
        """checks if it works or not, that getting scored dictionary object from scored_matrix

        :return:
        """
        data_csr_object = data_converter.DataConverter(
        ).labeledMultiDocs2DocFreqMatrix(labeled_documents=self.input_dict,
                                         ngram=1,
                                         n_jobs=5)

        assert isinstance(data_csr_object.csr_matrix_, csr_matrix)
        assert isinstance(data_csr_object.label2id_dict, dict)
        assert isinstance(data_csr_object.vocabulary, dict)

        pmi_scored_matrix = PMI_python3.PMI().fit_transform(
            X=data_csr_object.csr_matrix_,
            n_jobs=5,
            n_docs_distribution=data_csr_object.n_docs_distribution)

        # main part of test
        # when sort is True, cut_zero is True, outformat is dict
        pmi_scored_dictionary_objects = ScoredResultObject(
            scored_matrix=pmi_scored_matrix,
            label2id_dict=data_csr_object.label2id_dict,
            feature2id_dict=data_csr_object.vocabulary
        ).ScoreMatrix2ScoreDictionary(outformat='dict',
                                      sort_desc=True,
                                      n_jobs=5)
        assert isinstance(pmi_scored_dictionary_objects, dict)
        logging.debug(pmi_scored_dictionary_objects)

        # when sort is True, cut_zero is True, outformat is items
        pmi_scored_dictionary_objects = ScoredResultObject(
            scored_matrix=pmi_scored_matrix,
            label2id_dict=data_csr_object.label2id_dict,
            feature2id_dict=data_csr_object.vocabulary
        ).ScoreMatrix2ScoreDictionary(outformat='items',
                                      sort_desc=True,
                                      n_jobs=5)
        assert isinstance(pmi_scored_dictionary_objects, list)
        for d in pmi_scored_dictionary_objects:
            assert isinstance(d, dict)
        logging.debug(pmi_scored_dictionary_objects)

        # when sort is True, cut_zero is False, outformat is dict
        pmi_scored_dictionary_objects = ScoredResultObject(
            scored_matrix=pmi_scored_matrix,
            label2id_dict=data_csr_object.label2id_dict,
            feature2id_dict=data_csr_object.vocabulary
        ).ScoreMatrix2ScoreDictionary(outformat='dict',
                                      sort_desc=True,
                                      n_jobs=5)
        assert isinstance(pmi_scored_dictionary_objects, dict)
        logging.debug(pmi_scored_dictionary_objects)

        # when sort is True, cut_zero is False, outformat is items
        pmi_scored_dictionary_objects = ScoredResultObject(
            scored_matrix=pmi_scored_matrix,
            label2id_dict=data_csr_object.label2id_dict,
            feature2id_dict=data_csr_object.vocabulary
        ).ScoreMatrix2ScoreDictionary(outformat='items',
                                      sort_desc=True,
                                      n_jobs=5)
        assert isinstance(pmi_scored_dictionary_objects, list)
        for d in pmi_scored_dictionary_objects:
            assert isinstance(d, dict)
        logging.debug(pmi_scored_dictionary_objects)