def process_doc(book_list, outfile_name): ordered_doc = Document() for book in book_list: for _, sent in tree_dic[book]: bund = ordered_doc.create_bundle() bund.add_tree(sent) for block in blocks: block.apply_on_document(ordered_doc) if outfile_name: ordered_doc.store_conllu(outfile_name)
parser.add_argument('-e', '--end', type=int, default=24, help='Ending book') parser.add_argument('-o', '--out', help='Output file') args = parser.parse_args() doc = Document() reader = AgldtReader(args.infile, fix_cycles=True) reader.apply_on_document(doc) trees = [b.get_tree() for b in doc.bundles] tree_dic = get_ordered_trees(trees, args.start, args.end) book_list = sorted(tree_dic.keys()) ordered_doc = Document() for book in book_list: for _, sent in tree_dic[book]: bund = ordered_doc.create_bundle() bund.add_tree(sent) outname = args.out blocks = [ SetSpaceAfter(), CreateUpos(), CreateFeats(), SetMember(), ShallowConverter(), ShiftArtificials(), SubTreeConverter(with_enhanced=True), FixObj(), SetArtificials(), MakeEnhanced(