예제 #1
0
    def POST(self):
        web.header('Content-Type', 'application/json')
        post_data = web.input(_method='post')
        # print post_data


        # read the data from request and save into file
        file_id = np.random.randint(1000000, 2000000)
        input_path = os.path.join("tmp", "user.%i.input" % file_id)
        with codecs.open(input_path, "w", "utf-8") as f:
            f.write(post_data["text"])

        test_sentences = loader.load_sentences(input_path, lower, zeros)
        update_tag_scheme(test_sentences, tag_scheme)

        test_data = prepare_dataset3(
            test_sentences, word_to_id, char_to_id, model.tag_maps, model.feature_maps, lower
        )

        # print test_data[0]

        out_sentences = predict_multilayer(parameters, f_eval, test_sentences, test_data, model.tag_maps, None)

        text = ""
        # predictions_list = [p.split("\t") for p in predictions]
        text = " ".join([line[0] for s in out_sentences for line in s])

        data = {"sentences": out_sentences, "text": text}

        return json.dumps(data, indent=4, sort_keys=True, encoding="utf-8")
예제 #2
0
    def POST(self):


        post_data = web.input(_method='post')

        feature_type = post_data["feature"]
        f_eval, model, parameters, lower, zeros, tag_scheme, word_to_id, char_to_id = web.pre_load_data[feature_type]

        if post_data.has_key("format"):
            format = post_data["format"]
        else:
            format = "json"

        # print post_data


        # read the data from request and save into file
        file_id = np.random.randint(1000000, 2000000)

        input_path = os.path.join("temp", "user.%i.input" % file_id)
        with codecs.open(input_path, "w", "utf-8") as f:
            f.write(post_data["text"])

        gold_colums = [int(x['column']) for x in model.tag_maps]
        test_sentences = loader.load_sentences(input_path, lower, zeros)
        update_tag_scheme_multilayer(test_sentences, gold_colums, tag_scheme)

        test_data = prepare_dataset3(
            test_sentences, word_to_id, char_to_id, model.tag_maps, model.feature_maps, lower
        )

        # print test_data[0]

        out_sentences = predict_multilayer(parameters, f_eval, test_sentences, test_data, model.tag_maps, None)

        results = extract_re_from_conll_data(out_sentences)

        if format == "json":
            data = {"conll_data": out_sentences, "text_data": results}
            web.header('Content-Type', 'application/json')
            return json.dumps(data, indent=4, sort_keys=True, encoding="utf-8")
        else:
            conll_text = "\n\n".join(["\n".join(["\t".join(l) for l in item]) for item in out_sentences])
            return conll_text
예제 #3
0
print 'Reloading previous model...'
_, f_eval = model.build(training=False, **parameters)
model.reload()


assert os.path.isfile(opts.test_file)
test_file = opts.test_file

out_txt = opts.out_txt
out_json = opts.out_json

test_sentences = loader.load_sentences(test_file, lower, zeros)
update_tag_scheme(test_sentences, tag_scheme)

test_data = prepare_dataset3(
    test_sentences, word_to_id, char_to_id, model.tag_maps, model.feature_maps, lower
)


print "input: ", test_file

from pprint import pprint
print(model.tag_maps)
pprint(model.tag_maps)

test_score, iob_test_score, result_test, _ = evaluate_multilayer(parameters, f_eval, test_sentences, test_data, model.tag_maps)

print_evaluation_result(result_test)

print "OVERALL: %f" % test_score
예제 #4
0
    print "--------------------"
    dico_ftag, ftag_to_id, id_to_ftag = feature_mapping(train_sentences, f)
    print 'feature_name:', f['name']
    print 'dico_ftag   :', dico_ftag
    print 'ftag_to_id  :', ftag_to_id
    print 'id_to_ftag  :', id_to_ftag
    feature_maps.append({
        'name': f['name'],
        'column': f['column'],
        'dim': f['dim'],
        'dico_ftag': dico_ftag,
        'ftag_to_id': ftag_to_id,
        'id_to_ftag': id_to_ftag
    })

train_data = prepare_dataset3(train_sentences, word_to_id, char_to_id,
                              tag_maps, feature_maps, lower)

dev_data = prepare_dataset3(dev_sentences, word_to_id, char_to_id, tag_maps,
                            feature_maps, lower)
test_data = prepare_dataset3(test_sentences, word_to_id, char_to_id, tag_maps,
                             feature_maps, lower)

print "%i / %i / %i sentences in train / dev / test." % (
    len(train_data), len(dev_data), len(test_data))

print "----------------------A training instance -------------------------"
print "{"
for key in train_data[0].keys():
    print "\t" + key, ":", train_data[0][key]
print "}"
print "-------------------------------------------------------------------"