Python YalignModel.optimize_gap_penalty_and_threshold Exemples

Langage de programmation: Python

Espace de nommage/Pack: yalign.yalignmodel

Class/Type: YalignModel

Méthode/Fonction: optimize_gap_penalty_and_threshold

Exemples au hotexamples.com: 2

Python YalignModel.optimize_gap_penalty_and_threshold - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de yalign.yalignmodel.YalignModel.optimize_gap_penalty_and_threshold extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

load(3)

YalignModel(2)

align(1)

align_indexes(1)

optimize_gap_penalty_and_threshold(1)

save(1)

Méthodes fréquemment utilisées

load (3)

YalignModel (2)

align (1)

align_indexes (1)

optimize_gap_penalty_and_threshold (1)

save (1)

Exemple #1

0

Afficher le fichier

Fichier : test_yalignmodel.py Projet : ahurriyetoglu/yalign

class TestYalignModel(unittest.TestCase): def setUp(self): random.seed(hash("Y U NO?")) base_path = os.path.dirname(os.path.abspath(__file__)) word_scores = os.path.join(base_path, "data", "test_word_scores_big.csv") parallel_corpus = os.path.join(base_path, "data", "parallel-en-es.txt") A, B = parallel_corpus_to_documents(parallel_corpus) A = A[:25] B = B[:25] self.alignments = list(training_alignments_from_documents(A, B)) self.A, self.B, self.correct_alignments = \ list(training_scrambling_from_documents(A, B)) # Word score word_pair_score = WordPairScore(word_scores) # Sentence Score sentence_pair_score = SentencePairScore() sentence_pair_score.train(self.alignments, word_pair_score) # Yalign model self.min_ = sentence_pair_score.min_bound self.max_ = sentence_pair_score.max_bound gap_penalty = (self.min_ + self.max_) / 2.0 document_aligner = SequenceAligner(sentence_pair_score, gap_penalty) self.model = YalignModel(document_aligner, 1) def test_save_file_created(self): tmp_folder = tempfile.mkdtemp() self.model.save(tmp_folder) model_path = os.path.join(tmp_folder, "aligner.pickle") metadata_path = os.path.join(tmp_folder, "metadata.json") self.assertTrue(os.path.exists(model_path)) self.assertTrue(os.path.exists(metadata_path)) def test_save_load_and_align(self): doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])] doc2 = [Sentence([u"Casa"])] result_before_save = self.model.align(doc1, doc2) # Save tmp_folder = tempfile.mkdtemp() self.model.save(tmp_folder) # Load new_model = YalignModel.load(tmp_folder) result_after_load = new_model.align(doc1, doc2) self.assertEqual(result_before_save, result_after_load) self.assertEqual(self.model.threshold, new_model.threshold) self.assertEqual(self.model.document_pair_aligner.penalty, new_model.document_pair_aligner.penalty) def test_reasonable_alignment(self): doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])] doc2 = [Sentence([u"Casa"])] result = self.model.align(doc1, doc2) result = [(list(x), list(y)) for x, y in result] self.assertIn((list(doc1[0]), list(doc2[0])), result) def test_optimize_gap_penalty_and_threshold_finishes(self): self.model.optimize_gap_penalty_and_threshold(self.A, self.B, self.correct_alignments) def test_optimize_gap_penalty_and_threshold_is_best(self): def evaluate(penalty, threshold): self.model.document_pair_aligner.penalty = penalty self.model.threshold = threshold predicted = self.model.align_indexes(self.A, self.B) return F_score(predicted, self.correct_alignments)[0] random.seed(hash("12345")) self.model.optimize_gap_penalty_and_threshold(self.A, self.B, self.correct_alignments) best_score = evaluate(self.model.document_pair_aligner.penalty, self.model.threshold) for _ in xrange(50): penalty = random.uniform(self.min_, self.max_ / 2.0) threshold = random.uniform(self.min_, self.max_) score = evaluate(penalty, threshold) self.assertGreaterEqual(best_score, score)

Exemple #2

0

Afficher le fichier

class TestYalignModel(unittest.TestCase): def setUp(self): random.seed(hash("Y U NO?")) base_path = os.path.dirname(os.path.abspath(__file__)) word_scores = os.path.join(base_path, "data", "test_word_scores_big.csv") parallel_corpus = os.path.join(base_path, "data", "parallel-en-es.txt") A, B = parallel_corpus_to_documents(parallel_corpus) A = A[:25] B = B[:25] self.alignments = list(training_alignments_from_documents(A, B)) self.A, self.B, self.correct_alignments = \ list(training_scrambling_from_documents(A, B)) # Word score word_pair_score = WordPairScore(word_scores) # Sentence Score sentence_pair_score = SentencePairScore() sentence_pair_score.train(self.alignments, word_pair_score) # Yalign model self.min_ = sentence_pair_score.min_bound self.max_ = sentence_pair_score.max_bound gap_penalty = (self.min_ + self.max_) / 2.0 document_aligner = SequenceAligner(sentence_pair_score, gap_penalty) self.model = YalignModel(document_aligner, 1) def test_save_file_created(self): tmp_folder = tempfile.mkdtemp() self.model.save(tmp_folder) model_path = os.path.join(tmp_folder, "aligner.pickle") metadata_path = os.path.join(tmp_folder, "metadata.json") self.assertTrue(os.path.exists(model_path)) self.assertTrue(os.path.exists(metadata_path)) def test_save_load_and_align(self): doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])] doc2 = [Sentence([u"Casa"])] result_before_save = self.model.align(doc1, doc2) # Save tmp_folder = tempfile.mkdtemp() self.model.save(tmp_folder) # Load new_model = YalignModel.load(tmp_folder) result_after_load = new_model.align(doc1, doc2) self.assertEqual(result_before_save, result_after_load) self.assertEqual(self.model.threshold, new_model.threshold) self.assertEqual(self.model.document_pair_aligner.penalty, new_model.document_pair_aligner.penalty) def test_reasonable_alignment(self): doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])] doc2 = [Sentence([u"Casa"])] result = self.model.align(doc1, doc2) result = [(list(x), list(y)) for x, y in result] self.assertIn((list(doc1[0]), list(doc2[0])), result) def test_optimize_gap_penalty_and_threshold_finishes(self): self.model.optimize_gap_penalty_and_threshold(self.A, self.B, self.correct_alignments) def test_optimize_gap_penalty_and_threshold_is_best(self): def evaluate(penalty, threshold): self.model.document_pair_aligner.penalty = penalty self.model.threshold = threshold predicted = self.model.align_indexes(self.A, self.B) return F_score(predicted, self.correct_alignments)[0] random.seed(hash("12345")) self.model.optimize_gap_penalty_and_threshold(self.A, self.B, self.correct_alignments) best_score = evaluate(self.model.document_pair_aligner.penalty, self.model.threshold) for _ in xrange(50): penalty = random.uniform(self.min_, self.max_ / 2.0) threshold = random.uniform(self.min_, self.max_) score = evaluate(penalty, threshold) self.assertGreaterEqual(best_score, score)