コード例 #1
0
 def test_fit(self):
     model_path = fth.fasttext_fit(self.train_path, {'-bucket': 1000},
                                   self.ft_path,
                                   model_path=self.model_path,
                                   thread=1,
                                   compress_model=False)
     expected_model_path = self.model_path + '.bin'
     assert model_path == expected_model_path
     assert os.path.isfile(model_path)
     os.remove(model_path)
コード例 #2
0
 def test_predict(self):
     model_path = fth.fasttext_fit(self.train_path, {'-bucket': 1000},
                                   self.ft_path,
                                   model_path=self.model_path,
                                   thread=1,
                                   compress_model=False)
     fth.fasttext_predict(model_path, self.test_path, self.ft_path,
                          self.probability_path)
     assert os.path.isfile(self.probability_path)
     os.remove(model_path)
     os.remove(self.probability_path)
コード例 #3
0
 def test_fit_pretrained_vectors(self):
     model_path = fth.fasttext_fit(
         self.train_path, {'-bucket': 1000},
         self.ft_path,
         model_path=self.model_path,
         thread=1,
         compress_model=False,
         pretrained_vectors_path=self.pretrained_vectors_path)
     expected_model_path = self.model_path + '.bin'
     self.assertEqual(model_path, expected_model_path)
     self.assertTrue(os.path.isfile(model_path))
     os.remove(model_path)
コード例 #4
0
 def test_fit_compressed(self):
     model_path = fth.fasttext_fit(self.train_path, {
         '-bucket': 1000,
         '-wordNgrams': 2
     },
                                   self.ft_path,
                                   model_path=self.model_path,
                                   thread=1,
                                   compress_model=True)
     expected_model_path = self.model_path + '.ftz'
     assert model_path == expected_model_path
     assert os.path.isfile(model_path)
     os.remove(model_path)
コード例 #5
0
 def test_fasttext_class_probabilities(self):
     model_path = fth.fasttext_fit(self.train_path, {'-bucket': 1000},
                                   self.ft_path,
                                   model_path=self.model_path,
                                   thread=1,
                                   compress_model=False)
     fth.fasttext_predict(model_path, self.test_path, self.ft_path,
                          self.probability_path)
     probabilities = fth.load_fasttext_class_probabilities(
         self.probability_path)
     assert len(probabilities) == 40
     assert all([x > 0.75 for x in probabilities[:20]])
     assert all([x < 0.25 for x in probabilities[20:]])
     os.remove(model_path)
     os.remove(self.probability_path)
コード例 #6
0
                      compression='infer',
                      header=None,
                      index_col=None)
test_df.columns = [
    'pmid', 'paragraph', 'sentence', 'entity1', 'entity2', 'text', 'class',
    'distance', 'pairs', 'key'
]
del test_df['key']
del test_df['pairs']

perfomance = list()
for i in range(repeats):
    model_file = fasttext_fit(ft_train_path,
                              ft_params,
                              fasttext_path,
                              thread=ft_threads,
                              compress_model=True,
                              model_path=tmp_model_path + str(i),
                              pretrained_vectors_path=pretrained_embeddings)

    fth.fasttext_predict(tmp_model_path + str(i) + '.ftz', ft_test_path,
                         fasttext_path, prob_path)
    probabilities = fth.load_fasttext_class_probabilities(prob_path)
    test_df = test_df.assign(predicted=probabilities)

    _, tmp_file_path = tempfile.mkstemp(text=True, suffix='.gz')
    with gzip.open(tmp_file_path, 'wt') as test_out:
        test_df.to_csv(test_out,
                       sep='\t',
                       header=False,
                       index=False,