def read_corpus(corpus): if not isinstance(corpus, HitaextDoc): corpus = HitaextDoc(file=corpus) from_tree = corpus.get_doc_tree("from") from_tree.update() to_tree = corpus.get_doc_tree("to") to_tree.update() corpus.inject_alignments(from_tree, to_tree) return corpus, from_tree, to_tree
def read_corpus(corpus): if not isinstance(corpus, HitaextDoc): corpus = HitaextDoc(file=corpus) from_tree = corpus.get_doc_tree("from") from_tree.update() to_tree = corpus.get_doc_tree("to") to_tree.update() corpus.inject_alignments(from_tree, to_tree) return corpus, from_tree, to_tree
parser.add_argument( "-V", "--verbose", action="store_true", help="verbose output" ) args = parser.parse_args() if args.verbose: print >>stderr, "Reading corpus from", args.corpus corpus = HitaextDoc(file=args.corpus) from_tree = corpus.get_doc_tree("from") to_tree = corpus.get_doc_tree("to") from_tree.update() to_tree.update() corpus.inject_alignments(from_tree, to_tree) corpus.alignment.set("method", "id") corpus.extract_alignments(from_tree, to_tree) if args.verbose: print >>stderr, "Writing corpus to", args.corpus corpus.write(args.corpus)
default="parallel text corpus", help="") parser.add_argument("-V", "--verbose", action="store_true", help="verbose output") args = parser.parse_args() if args.verbose: print >> stderr, "Reading corpus from", args.corpus corpus = HitaextDoc(file=args.corpus) from_tree = corpus.get_doc_tree("from") to_tree = corpus.get_doc_tree("to") from_tree.update() to_tree.update() corpus.inject_alignments(from_tree, to_tree) corpus.alignment.set("method", "id") corpus.extract_alignments(from_tree, to_tree) if args.verbose: print >> stderr, "Writing corpus to", args.corpus corpus.write(args.corpus)