Esempio n. 1
0
def process_doc(book_list, outfile_name):
    ordered_doc = Document()

    for book in book_list:
        for _, sent in tree_dic[book]:
            bund = ordered_doc.create_bundle()
            bund.add_tree(sent)

    for block in blocks:
        block.apply_on_document(ordered_doc)

    if outfile_name:
        ordered_doc.store_conllu(outfile_name)
Esempio n. 2
0
parser.add_argument('-e', '--end', type=int, default=24, help='Ending book')
parser.add_argument('-o', '--out', help='Output file')
args = parser.parse_args()

doc = Document()
reader = AgldtReader(args.infile, fix_cycles=True)
reader.apply_on_document(doc)
trees = [b.get_tree() for b in doc.bundles]
tree_dic = get_ordered_trees(trees, args.start, args.end)
book_list = sorted(tree_dic.keys())

ordered_doc = Document()

for book in book_list:
    for _, sent in tree_dic[book]:
        bund = ordered_doc.create_bundle()
        bund.add_tree(sent)

outname = args.out

blocks = [
    SetSpaceAfter(),
    CreateUpos(),
    CreateFeats(),
    SetMember(),
    ShallowConverter(),
    ShiftArtificials(),
    SubTreeConverter(with_enhanced=True),
    FixObj(),
    SetArtificials(),
    MakeEnhanced(