Beispiel #1
0
def run(args):
    check_present(args, ["input", "parser", "output"])
    assert os.path.exists(args.input), args.input
    assert os.path.exists(args.parser), args.parser

    # Read parser flow.
    flow = Flow()
    flow.load(args.parser)

    # Initialize the spec from the flow.
    spec = Spec()
    spec.from_flow(flow)

    # Initialize the model from the flow.
    caspar = Caspar(spec)
    caspar.from_flow(flow)

    corpus = Corpora(args.input, caspar.spec.commons)
    writer = sling.RecordWriter(args.output)
    count = 0
    for document in corpus:
        state, _, _, trace = caspar.forward(document,
                                            train=False,
                                            debug=args.trace)
        state.write()
        if trace:
            trace.write()
        writer.write(str(count), state.encoded())
        count += 1
        if count % 100 == 0:
            print "Annotated", count, "documents", now(), mem()
    writer.close()
    print "Annotated", count, "documents", now(), mem()
    print "Wrote annotated documents to", args.output

    if args.evaluate:
        f = tempfile.NamedTemporaryFile(delete=False)
        fname = f.name
        caspar.spec.commons.save(fname, binary=True)
        f.close()
        eval_result = frame_evaluation(gold_corpus_path=args.input, \
            test_corpus_path=args.output, commons=caspar.spec.commons)
        os.unlink(fname)
        return eval_result