def test_cy_equal_np_ft_random(self):
        ft = FastText(size=20, min_count=1)
        ft.build_vocab(SENTENCES)

        m1 = Average(ft)
        m1.prep.prepare_vectors(sv=m1.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m1._pre_train_calls()

        from fse.models.average_inner import MAX_NGRAMS_IN_BATCH
        m1.batch_ngrams = MAX_NGRAMS_IN_BATCH
        mem1 = m1._get_thread_working_mem()
        o1 = train_average_np(m1, self.sentences[:2], m1.sv.vectors, mem1)

        m2 = Average(ft)
        m2.prep.prepare_vectors(sv=m2.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m2._pre_train_calls()
        mem2 = m2._get_thread_working_mem()

        from fse.models.average_inner import train_average_cy
        o2 = train_average_cy(m2, self.sentences[:2], m2.sv.vectors, mem2)

        self.assertEqual(o1, o2)
        self.assertTrue(np.allclose(m1.sv.vectors, m2.sv.vectors, atol=1e-6))
Esempio n. 2
0
    def test_cy_equal_np_w2v_random(self):
        w2v = Word2Vec(min_count=1, size=DIM)
        # Random initialization
        w2v.build_vocab(SENTENCES)

        m1 = Average(w2v)
        m1.prep.prepare_vectors(
            sv=m1.sv, total_sentences=len(self.sentences), update=False
        )
        m1._pre_train_calls()
        mem1 = m1._get_thread_working_mem()
        o1 = train_average_np(m1, self.sentences, m1.sv.vectors, mem1)

        m2 = Average(w2v)
        m2.prep.prepare_vectors(
            sv=m2.sv, total_sentences=len(self.sentences), update=False
        )
        m2._pre_train_calls()
        mem2 = m2._get_thread_working_mem()

        from fse.models.average_inner import train_average_cy

        o2 = train_average_cy(m2, self.sentences, m2.sv.vectors, mem2)

        self.assertTrue(np.allclose(m1.sv.vectors, m2.sv.vectors, atol=1e-6))
Esempio n. 3
0
 def test_average_train_np_w2v(self):
     self.model.sv.vectors = np.zeros_like(self.model.sv.vectors, dtype=np.float32)
     mem = self.model._get_thread_working_mem()
     output = train_average_np(
         self.model, self.sentences, self.model.sv.vectors, mem
     )
     self.assertEqual((4, 7), output)
     self.assertTrue((183 == self.model.sv[0]).all())
     self.assertTrue((164.5 == self.model.sv[1]).all())
     self.assertTrue((self.model.wv.vocab["go"].index == self.model.sv[2]).all())
Esempio n. 4
0
 def test_average_train_np_ft(self):
     ft = FastText(min_count=1, size=DIM)
     ft.build_vocab(SENTENCES)
     m = Average(ft)
     m.prep.prepare_vectors(
         sv=m.sv, total_sentences=len(self.sentences), update=False
     )
     m._pre_train_calls()
     m.wv.vectors = m.wv.vectors_vocab = np.ones_like(m.wv.vectors, dtype=np.float32)
     m.wv.vectors_ngrams = np.full_like(m.wv.vectors_ngrams, 2, dtype=np.float32)
     mem = m._get_thread_working_mem()
     output = train_average_np(m, self.sentences, m.sv.vectors, mem)
     self.assertEqual((4, 10), output)
     self.assertTrue((1.0 == m.sv[0]).all())
     self.assertTrue((1.5 == m.sv[2]).all())
     self.assertTrue((2 == m.sv[3]).all())
    def test_cy_equal_np_w2v(self):
        m1 = Average(W2V)
        m1.prep.prepare_vectors(sv=m1.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m1._pre_train_calls()
        mem1 = m1._get_thread_working_mem()
        o1 = train_average_np(m1, self.sentences, m1.sv.vectors, mem1)

        m2 = Average(W2V)
        m2.prep.prepare_vectors(sv=m2.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m2._pre_train_calls()
        mem2 = m2._get_thread_working_mem()
        o2 = train_average_cy(m2, self.sentences, m2.sv.vectors, mem2)

        self.assertEqual(o1, o2)
        self.assertTrue((m1.sv.vectors == m2.sv.vectors).all())