Beispiel #1
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])

    if not argp.no_cache:
        # We can't do it iteratively listening to stdin, read it all
        doc = Document('<classify>', [], [], '<classify>')
        for _string in (l.rstrip('\n') for l in argp.input):
            doc.abstract.append(_string_to_ann_sent(_string))
        docs = (doc, )
    else:
        docs = (Document('Line: %s' % i, [], [_string_to_ann_sent(_string)],
            '<stdin>') for  i, _string in enumerate(
                (l.rstrip('\n') for l in argp.input), start=1))

    # Cache the strings for speed
    if not argp.no_cache:
        cache_simstring((docs, ), verbose=argp.verbose)

    with open(argp.model_path, 'r') as model_file:
        classifier = pickle_load(model_file)

    # TODO: Faster to do it in a batch instead
    for doc in docs:
        for sent in doc:
            for ann in sent:
                print '%s\t%s' % (sent.annotation_text(ann),
                        str(classifier.classify(doc, sent, ann, ranked=True)))
Beispiel #2
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])

    # Create a dataset out of the input
    doc = _tab_separated_input_to_doc(argp.input)

    # Cache the strings for speed
    cache_simstring(((doc, ), ), verbose=argp.verbose)

    classifier = SimStringInternalClassifier()
    classifier.train((doc, ))

    with open(argp.model_path, 'w') as model_file:
        pickle_dump(classifier, model_file)
Beispiel #3
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])

    # Create a dataset out of the input
    doc = _tab_separated_input_to_doc(argp.input)

    # Cache the strings for speed
    cache_simstring(((doc, ), ), verbose=argp.verbose)

    classifier = SimStringInternalClassifier()
    classifier.train((doc, ))

    with open(argp.model_path, 'w') as model_file:
        pickle_dump(classifier, model_file)