def test_fit(self): model_path = fth.fasttext_fit(self.train_path, {'-bucket': 1000}, self.ft_path, model_path=self.model_path, thread=1, compress_model=False) expected_model_path = self.model_path + '.bin' assert model_path == expected_model_path assert os.path.isfile(model_path) os.remove(model_path)
def test_predict(self): model_path = fth.fasttext_fit(self.train_path, {'-bucket': 1000}, self.ft_path, model_path=self.model_path, thread=1, compress_model=False) fth.fasttext_predict(model_path, self.test_path, self.ft_path, self.probability_path) assert os.path.isfile(self.probability_path) os.remove(model_path) os.remove(self.probability_path)
def test_fit_pretrained_vectors(self): model_path = fth.fasttext_fit( self.train_path, {'-bucket': 1000}, self.ft_path, model_path=self.model_path, thread=1, compress_model=False, pretrained_vectors_path=self.pretrained_vectors_path) expected_model_path = self.model_path + '.bin' self.assertEqual(model_path, expected_model_path) self.assertTrue(os.path.isfile(model_path)) os.remove(model_path)
def test_fit_compressed(self): model_path = fth.fasttext_fit(self.train_path, { '-bucket': 1000, '-wordNgrams': 2 }, self.ft_path, model_path=self.model_path, thread=1, compress_model=True) expected_model_path = self.model_path + '.ftz' assert model_path == expected_model_path assert os.path.isfile(model_path) os.remove(model_path)
def test_fasttext_class_probabilities(self): model_path = fth.fasttext_fit(self.train_path, {'-bucket': 1000}, self.ft_path, model_path=self.model_path, thread=1, compress_model=False) fth.fasttext_predict(model_path, self.test_path, self.ft_path, self.probability_path) probabilities = fth.load_fasttext_class_probabilities( self.probability_path) assert len(probabilities) == 40 assert all([x > 0.75 for x in probabilities[:20]]) assert all([x < 0.25 for x in probabilities[20:]]) os.remove(model_path) os.remove(self.probability_path)
compression='infer', header=None, index_col=None) test_df.columns = [ 'pmid', 'paragraph', 'sentence', 'entity1', 'entity2', 'text', 'class', 'distance', 'pairs', 'key' ] del test_df['key'] del test_df['pairs'] perfomance = list() for i in range(repeats): model_file = fasttext_fit(ft_train_path, ft_params, fasttext_path, thread=ft_threads, compress_model=True, model_path=tmp_model_path + str(i), pretrained_vectors_path=pretrained_embeddings) fth.fasttext_predict(tmp_model_path + str(i) + '.ftz', ft_test_path, fasttext_path, prob_path) probabilities = fth.load_fasttext_class_probabilities(prob_path) test_df = test_df.assign(predicted=probabilities) _, tmp_file_path = tempfile.mkstemp(text=True, suffix='.gz') with gzip.open(tmp_file_path, 'wt') as test_out: test_df.to_csv(test_out, sep='\t', header=False, index=False,