Ejemplo n.º 1
0
def test_get_results():
    with clean_es():
        id, id2 = create_input(text="Een test"), create_input(text="Nog een test")
        
        doc_type = "TEST"
        assert_raises(NotFoundError, backend.get_document, id, doc_type)
        assert_equal([], list(dict(backend.get_cached_documents([id, id2], doc_type)).keys()))

        backend.store_result(doc_type, id, [{"module": "TEST"}], "test result")
        backend._es.indices.flush()
        d2 = backend.get_document(id, doc_type)
        assert_equal(d2.pipeline, [{"module": "TEST"}])
        assert_equal(d2.text, "test result")

        assert_equal([id], list(dict(backend.get_cached_documents([id, id2], doc_type)).keys()))
        assert_equal(1, dict(backend.count_cached([id, id2]))[doc_type])
Ejemplo n.º 2
0
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('module', help='nlpipe module (task) name ({})'.format(", ".join(sorted(modules))),
                    choices=modules, metavar="module")
parser.add_argument('--adhoc', help='Ad hoc: parse sentence directly (provide sentence instead of ids)',
                    action='store_true', default=False)
parser.add_argument('-p', '--print', help='Print results to stdout', action='store_true', default=False)
parser.add_argument('-f', '--force', help='Force re-parse even if result is cached', action='store_true', default=False)
parser.add_argument('target', nargs='+', help='Article id(s) (or text in adhoc mode)')

args = parser.parse_args()
task = modules[args.module]

if args.adhoc:
    if args.target == ["-"]:
        text = sys.stdin.read()
    else:
        text = " ".join(args.target)
    print("Parsing {text!r} using {task}".format(**locals()), file=sys.stderr)
    result = task._process(text)
    print(result)
else:
    aids = [int(x) for x in args.target]
    for aid in aids:
        if args.force and backend.exists(task.doc_type, aid):
            backend.delete_result(task.doc_type, aid)
        
        task.run(aid)
        if args.print:
            result = backend.get_document(aid, task.doc_type)
            print(result.text.encode('utf-8'))