Python text_to_document Examples

Programming Language: Python

Namespace/Package Name: yalign.input_conversion

Method/Function: text_to_document

Examples at hotexamples.com: 3

Python text_to_document - 3 examples found. These are the top rated real world Python examples of yalign.input_conversion.text_to_document extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: yalign_wrapper.py Project: thientu/hundict

def main():
    output_format = "plaintext"
    lang_a = sys.argv[1]
    lang_b = sys.argv[2]
    model_path = os.path.abspath(sys.argv[3])
    nltk.data.path += [model_path]
    model = YalignModel.load(model_path)

    pairing = read_pairing(open(sys.argv[4]), lang_a, lang_b)
    src_needed = set([a for a, _ in pairing])
    tgt_needed = set([a for _, a in pairing])
    src_articles = read_articles(open(sys.argv[5]), src_needed)
    tgt_articles = read_articles(open(sys.argv[6]), tgt_needed)
    for src, tgt in pairing:
        try:
            text_a = "\n".join(src_articles[src])
            text_b = "\n".join(tgt_articles[tgt])
            document_a = text_to_document(text_a, lang_a)
            document_b = text_to_document(text_b, lang_b)
            pairs = model.align(document_a, document_b)
            sys.stderr.write(u"{0} pairs in {1}-{2}\n".format(len(pairs), src, tgt).encode("utf-8"))

            write_plaintext(sys.stdout, pairs)
        except KeyError:
            sys.stderr.write(u"KeyError with {0}-{1}\n".format(src, tgt).encode("utf-8"))
            continue

Example #2

Show file

File: test_input_conversion.py Project: ahurriyetoglu/yalign

 def test_contains_more_than_one_sentence(self):
     document = text_to_document(self.text, self.language)
     self.assertGreater(len(document), 1)
     for sentence in document:
         self.assertIsInstance(sentence, Sentence)
         for word in sentence:
             self.assertIsInstance(word, unicode)

Example #3

Show file

File: test_input_conversion.py Project: wannaphong/yalign

 def test_contains_more_than_one_sentence(self):
     document = text_to_document(self.text, self.language)
     self.assertGreater(len(document), 1)
     for sentence in document:
         self.assertIsInstance(sentence, Sentence)
         for word in sentence:
             self.assertIsInstance(word, unicode)