def test_extend_vocab_1(self): vectors_cache_dir = '.cache' if os.path.exists(vectors_cache_dir): shutil.rmtree(vectors_cache_dir) mf = MatchingField() lf = MatchingField(id=True, sequential=False) fields = [('id', lf), ('left_a', mf), ('right_a', mf), ('label', lf)] col_naming = { 'id': 'id', 'label': 'label', 'left': 'left_', 'right': 'right_' } pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets')) filename = 'fasttext_sample.vec' file = os.path.join(pathdir, filename) url_base = urljoin('file:', pathname2url(file)) vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base) data_path = os.path.join(test_dir_path, 'test_datasets', 'sample_table_small.csv') md = MatchingDataset(fields, col_naming, path=data_path) mf.build_vocab() mf.vocab.vectors = torch.Tensor(len(mf.vocab.itos), 300) mf.extend_vocab(md, vectors=vecs) self.assertEqual(len(mf.vocab.itos), 6) self.assertEqual(mf.vocab.vectors.size(), torch.Size([6, 300]))
def test_build_vocab_2(self): mf = MatchingField() vector_file_name = 'fasttext.wiki_test.vec' cache_dir = os.path.join(test_dir_path, 'test_datasets') vec_data = mf.build_vocab(vectors=vector_file_name, cache=cache_dir)
def test_build_vocab_3(self): mf = MatchingField() vector_file_name = 'fasttext.crawl_test.vec' cache_dir = os.path.join(test_dir_path, 'test_datasets') vec_data = mf.build_vocab(vectors=vector_file_name, cache=cache_dir) self.assertIsNone(vec_data)
def test_build_vocab_1(self): mf = MatchingField() mf.build_vocab()