コード例 #1
0
ファイル: yalign_wrapper.py プロジェクト: thientu/hundict
def main():
    output_format = "plaintext"
    lang_a = sys.argv[1]
    lang_b = sys.argv[2]
    model_path = os.path.abspath(sys.argv[3])
    nltk.data.path += [model_path]
    model = YalignModel.load(model_path)

    pairing = read_pairing(open(sys.argv[4]), lang_a, lang_b)
    src_needed = set([a for a, _ in pairing])
    tgt_needed = set([a for _, a in pairing])
    src_articles = read_articles(open(sys.argv[5]), src_needed)
    tgt_articles = read_articles(open(sys.argv[6]), tgt_needed)
    for src, tgt in pairing:
        try:
            text_a = "\n".join(src_articles[src])
            text_b = "\n".join(tgt_articles[tgt])
            document_a = text_to_document(text_a, lang_a)
            document_b = text_to_document(text_b, lang_b)
            pairs = model.align(document_a, document_b)
            sys.stderr.write(u"{0} pairs in {1}-{2}\n".format(len(pairs), src, tgt).encode("utf-8"))

            write_plaintext(sys.stdout, pairs)
        except KeyError:
            sys.stderr.write(u"KeyError with {0}-{1}\n".format(src, tgt).encode("utf-8"))
            continue
コード例 #2
0
ファイル: test_yalignmodel.py プロジェクト: krzwolk/yalign
    def test_save_load_and_align(self):
        doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])]
        doc2 = [Sentence([u"Casa"])]
        result_before_save = self.model.align(doc1, doc2)

        # Save
        tmp_folder = tempfile.mkdtemp()
        self.model.save(tmp_folder)

        # Load
        new_model = YalignModel.load(tmp_folder)
        result_after_load = new_model.align(doc1, doc2)
        self.assertEqual(result_before_save, result_after_load)
        self.assertEqual(self.model.threshold, new_model.threshold)
        self.assertEqual(self.model.document_pair_aligner.penalty, new_model.document_pair_aligner.penalty)
コード例 #3
0
    def test_save_load_and_align(self):
        doc1 = [Sentence([u"House"]), Sentence([u"asoidfhuioasgh"])]
        doc2 = [Sentence([u"Casa"])]
        result_before_save = self.model.align(doc1, doc2)

        # Save
        tmp_folder = tempfile.mkdtemp()
        self.model.save(tmp_folder)

        # Load
        new_model = YalignModel.load(tmp_folder)
        result_after_load = new_model.align(doc1, doc2)
        self.assertEqual(result_before_save, result_after_load)
        self.assertEqual(self.model.threshold, new_model.threshold)
        self.assertEqual(self.model.document_pair_aligner.penalty,
                         new_model.document_pair_aligner.penalty)
コード例 #4
0
    def test_command_tool(self):
        if self.cmdline is None:
            return

        tmpdir = tempfile.mkdtemp()
        _, tmpfile = tempfile.mkstemp()
        self.model.save(tmpdir)

        cmd = self.cmdline.format(corpus=self.parallel_corpus, model=tmpdir)
        outputfh = open(tmpfile, "w")
        subprocess.call(cmd, shell=True, stdout=outputfh)
        outputfh = open(tmpfile)
        output = outputfh.read()

        A, B = parallel_corpus_to_documents(self.parallel_corpus)
        model = YalignModel.load(tmpdir)
        value = self.alignment_function(A, B, model)

        self.assertIn("{}%".format(value), output)
コード例 #5
0
ファイル: test_evaluation.py プロジェクト: wannaphong/yalign
    def test_command_tool(self):
        if self.cmdline is None:
            return

        tmpdir = tempfile.mkdtemp()
        _, tmpfile = tempfile.mkstemp()
        self.model.save(tmpdir)

        cmd = self.cmdline.format(corpus=self.parallel_corpus, model=tmpdir)
        outputfh = open(tmpfile, "w")
        subprocess.call(cmd, shell=True, stdout=outputfh)
        outputfh = open(tmpfile)
        output = outputfh.read()

        A, B = parallel_corpus_to_documents(self.parallel_corpus)
        model = YalignModel.load(tmpdir)
        value = self.alignment_function(A, B, model)

        self.assertIn("{}%".format(value), output)