def _do_train_job(data_iterable, target, memory):
     eff_sentences, eff_words = train_average_cy(
         model=se,
         indexed_sentences=data_iterable,
         target=target,
         memory=memory)
     return eff_sentences, eff_words
    def test_cy_equal_np_ft_random(self):
        ft = FastText(size=20, min_count=1)
        ft.build_vocab(SENTENCES)

        m1 = Average(ft)
        m1.prep.prepare_vectors(sv=m1.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m1._pre_train_calls()

        from fse.models.average_inner import MAX_NGRAMS_IN_BATCH
        m1.batch_ngrams = MAX_NGRAMS_IN_BATCH
        mem1 = m1._get_thread_working_mem()
        o1 = train_average_np(m1, self.sentences[:2], m1.sv.vectors, mem1)

        m2 = Average(ft)
        m2.prep.prepare_vectors(sv=m2.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m2._pre_train_calls()
        mem2 = m2._get_thread_working_mem()

        from fse.models.average_inner import train_average_cy
        o2 = train_average_cy(m2, self.sentences[:2], m2.sv.vectors, mem2)

        self.assertEqual(o1, o2)
        self.assertTrue(np.allclose(m1.sv.vectors, m2.sv.vectors, atol=1e-6))
예제 #3
0
    def test_cy_equal_np_w2v_random(self):
        w2v = Word2Vec(min_count=1, size=DIM)
        # Random initialization
        w2v.build_vocab(SENTENCES)

        m1 = Average(w2v)
        m1.prep.prepare_vectors(
            sv=m1.sv, total_sentences=len(self.sentences), update=False
        )
        m1._pre_train_calls()
        mem1 = m1._get_thread_working_mem()
        o1 = train_average_np(m1, self.sentences, m1.sv.vectors, mem1)

        m2 = Average(w2v)
        m2.prep.prepare_vectors(
            sv=m2.sv, total_sentences=len(self.sentences), update=False
        )
        m2._pre_train_calls()
        mem2 = m2._get_thread_working_mem()

        from fse.models.average_inner import train_average_cy

        o2 = train_average_cy(m2, self.sentences, m2.sv.vectors, mem2)

        self.assertTrue(np.allclose(m1.sv.vectors, m2.sv.vectors, atol=1e-6))
 def test_average_train_cy_w2v(self):
     self.model.sv.vectors = np.zeros_like(self.model.sv.vectors,
                                           dtype=np.float32)
     mem = self.model._get_thread_working_mem()
     output = train_average_cy(self.model, self.sentences,
                               self.model.sv.vectors, mem)
     self.assertEqual((4, 7), output)
     self.assertTrue((183 == self.model.sv[0]).all())
     self.assertTrue((164.5 == self.model.sv[1]).all())
     self.assertTrue(
         (self.model.wv.vocab["go"].index == self.model.sv[2]).all())
 def test_average_train_cy_ft(self):
     ft = FastText(min_count=1, size=DIM)
     ft.build_vocab(SENTENCES)
     m = Average(ft)
     m.prep.prepare_vectors(sv=m.sv,
                            total_sentences=len(self.sentences),
                            update=False)
     m._pre_train_calls()
     m.wv.vectors = m.wv.vectors_vocab = np.ones_like(m.wv.vectors,
                                                      dtype=np.float32)
     m.wv.vectors_ngrams = np.full_like(m.wv.vectors_ngrams,
                                        2,
                                        dtype=np.float32)
     mem = m._get_thread_working_mem()
     output = train_average_cy(m, self.sentences, m.sv.vectors, mem)
     self.assertEqual((4, 10), output)
     self.assertTrue((1. == m.sv[0]).all())
     self.assertTrue((1.5 == m.sv[2]).all())
     self.assertTrue((2 == m.sv[3]).all())
    def test_cy_equal_np_w2v(self):
        m1 = Average(W2V)
        m1.prep.prepare_vectors(sv=m1.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m1._pre_train_calls()
        mem1 = m1._get_thread_working_mem()
        o1 = train_average_np(m1, self.sentences, m1.sv.vectors, mem1)

        m2 = Average(W2V)
        m2.prep.prepare_vectors(sv=m2.sv,
                                total_sentences=len(self.sentences),
                                update=False)
        m2._pre_train_calls()
        mem2 = m2._get_thread_working_mem()
        o2 = train_average_cy(m2, self.sentences, m2.sv.vectors, mem2)

        self.assertEqual(o1, o2)
        self.assertTrue((m1.sv.vectors == m2.sv.vectors).all())