Beispiel #1
0
    def test_infer_vector(self):
        """Test that translation gives similar results to traditional inference.

        This may not be completely sensible/salient with such tiny data, but
        replaces what seemed to me to be an ever-more-nonsensical test.

        See <https://github.com/RaRe-Technologies/gensim/issues/2977> for discussion
        of whether the class this supposedly tested even survives when the
        TranslationMatrix functionality is better documented.
        """
        model = translation_matrix.BackMappingTranslationMatrix(
            self.source_doc_vec,
            self.target_doc_vec,
            self.train_docs[:5],
        )
        model.train(self.train_docs[:5])
        backmapped_vec = model.infer_vector(
            self.target_doc_vec.dv[self.train_docs[5].tags[0]])
        self.assertEqual(backmapped_vec.shape, (8, ))

        d2v_inferred_vector = self.source_doc_vec.infer_vector(
            self.train_docs[5].words)

        distance = cosine(backmapped_vec, d2v_inferred_vector)
        self.assertLessEqual(distance, 0.1)
Beispiel #2
0
 def test_translation_matrix(self):
     model = translation_matrix.BackMappingTranslationMatrix(
         self.source_doc_vec,
         self.target_doc_vec,
         self.train_docs[:5],
     )
     transmat = model.train(self.train_docs[:5])
     self.assertEqual(transmat.shape, (8, 8))
Beispiel #3
0
    def test_infer_vector(self):
        model = translation_matrix.BackMappingTranslationMatrix(self.train_docs[:5], self.source_doc_vec, self.target_doc_vec)
        model.train(self.train_docs[:5])
        infered_vec = model.infer_vector(self.target_doc_vec.docvecs[self.train_docs[5].tags])
        self.assertEqual(infered_vec.shape, (100, ))

        expected = 0.6453547135
        eps = 1e-6
        caculated = cosine(self.target_doc_vec.docvecs[self.train_docs[5].tags], infered_vec)
        self.assertLessEqual(math.fabs(caculated - expected), eps)
Beispiel #4
0
    def test_infer_vector(self):
        """Test that translation gives similar results to traditional inference.

        This may not be completely sensible/salient with such tiny data, but
        replaces a nonsensical test.
        """
        model = translation_matrix.BackMappingTranslationMatrix(
            self.source_doc_vec, self.target_doc_vec, self.train_docs[:5])
        model.train(self.train_docs[:5])
        backmapped_vec = model.infer_vector(
            self.target_doc_vec.dv[self.train_docs[5].tags])
        self.assertEqual(backmapped_vec.shape, (8, ))

        d2v_inferred_vector = self.source_doc_vec.infer_vector(
            self.train_docs[5].words)

        distance = cosine(backmapped_vec, d2v_inferred_vector)
        self.assertLessEqual(distance, 0.1)