Beispiel #1
0
    def test_extend_vocab_1(self):
        vectors_cache_dir = '.cache'
        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

        mf = MatchingField()
        lf = MatchingField(id=True, sequential=False)
        fields = [('id', lf), ('left_a', mf), ('right_a', mf), ('label', lf)]
        col_naming = {
            'id': 'id',
            'label': 'label',
            'left': 'left_',
            'right': 'right_'
        }

        pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
        filename = 'fasttext_sample.vec'
        file = os.path.join(pathdir, filename)
        url_base = urljoin('file:', pathname2url(file))
        vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)

        data_path = os.path.join(test_dir_path, 'test_datasets',
                                 'sample_table_small.csv')
        md = MatchingDataset(fields, col_naming, path=data_path)

        mf.build_vocab()
        mf.vocab.vectors = torch.Tensor(len(mf.vocab.itos), 300)
        mf.extend_vocab(md, vectors=vecs)
        self.assertEqual(len(mf.vocab.itos), 6)
        self.assertEqual(mf.vocab.vectors.size(), torch.Size([6, 300]))
Beispiel #2
0
 def test_build_vocab_2(self):
     mf = MatchingField()
     vector_file_name = 'fasttext.wiki_test.vec'
     cache_dir = os.path.join(test_dir_path, 'test_datasets')
     vec_data = mf.build_vocab(vectors=vector_file_name, cache=cache_dir)
Beispiel #3
0
 def test_build_vocab_3(self):
     mf = MatchingField()
     vector_file_name = 'fasttext.crawl_test.vec'
     cache_dir = os.path.join(test_dir_path, 'test_datasets')
     vec_data = mf.build_vocab(vectors=vector_file_name, cache=cache_dir)
     self.assertIsNone(vec_data)
Beispiel #4
0
 def test_build_vocab_1(self):
     mf = MatchingField()
     mf.build_vocab()