Beispiel #1
0
def evaluate():
    augment_pred = []
    with NERModel(config) as model:

        # create datasets
        augment = CoNLLDataset(config.filename_augment, config.processing_word,
                               config.processing_tag, config.max_iter)

        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)

        # build model
        model = NERModel(config)
        model.build()
        model.restore_session(config.dir_model)

        # evaluate
        model.logger.info("\nEvaluation on Test")
        model.evaluate(test)

        model.logger.info("\nEvaluation on Augment")
        model.evaluate(augment, augment_pred)

        # model.logger.debug(augment_pred)

    # clear memory
    del model

    return augment_pred
Beispiel #2
0
def main():
    # create instance of config
    config = Config()
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)
    predict = CoNLLDataset("data/source_data.txt", config.processing_word,
                           config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]),
                              max([len(seq[0]) for seq in predict]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.restore_session(config.dir_model)
    model.run_predict(predict)
Beispiel #3
0
def main():
    # create instance of config
    config = Config()
    config.layer = int(sys.argv[1])
    config.step = int(sys.argv[2])
    if config.task == 'pos':
        print("USING POS")
        config.filename_train = "data/train.pos"  # test
        config.filename_dev = "data/dev.pos"
        config.filename_test = "data/test.pos"
    else:
        print("USING NER")
    print("iteration: " + str(config.layer))
    print("step: " + str(config.step))

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
def train_active(train, dev, test, select, config, modename):
    """
    Input: train set, test set, selection set, configurations
    Output: accuracy on dev set, test set, prediction on selection set
    Select Most & Least Certain Examples from Select set
    """
    # build model
    #tf.reset_default_graph()
    #gc.collect()
    #tf.get_variable_scope().reuse_variables()
    model = NERModel(config)
    model.build()
    print("Start training model...")
    print("Training size ", len(train))
    model.train(train, dev)

    # restore session
    model.restore_session(config.dir_model)

    # evaluate
    print("===Evaluating on test set:===")
    mode = "test" + modename
    model.evaluate(test, mode)

    # run on selection set

    print("Selecting samples for active learning...")
    if len(select) == 0:
        return []
    l = []
    for sent in select:
        output = model.predict(sent[0])
        l.append(output[1][0])
    #sort l
    return l  #most uncertain and least uncertain
Beispiel #5
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    a_tensor = model.sess.graph.get_tensor_by_name(model.input_name + ':0')
    sum_tensor = model.sess.graph.get_tensor_by_name(model.output_name + ':0')

    model_input = build_tensor_info(a_tensor)
    model_output = build_tensor_info(sum_tensor)

    # Create a signature definition for tfserving
    signature_definition = signature_def_utils.build_signature_def(
        inputs={model.input_name: model_input},
        outputs={model.output_name: model_output},
        method_name=signature_constants.PREDICT_METHOD_NAME)

    builder = saved_model_builder.SavedModelBuilder(export_loc)

    builder.add_meta_graph_and_variables(
        model.sess, [tag_constants.SERVING],
        signature_def_map={
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            signature_definition
        },
        legacy_init_op=tf.tables_initializer())

    # Save the model so we can serve it with a model server :)
    builder.save()
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")
    model.restore_session(config.dir_model)

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    processing_word = get_processing_word(lowercase=True)
    dev = CoNLLDataset(config.filename_dev, processing_word)
    train = CoNLLDataset(config.filename_train, processing_word)
    test = CoNLLDataset(config.filename_test, processing_word)

    train4cl = CoNLLdata4classifier(train,
                                    processing_word=config.processing_word,
                                    processing_tag=config.processing_tag)
    dev4cl = CoNLLdata4classifier(dev,
                                  processing_word=config.processing_word,
                                  processing_tag=config.processing_tag)
    test4cl = CoNLLdata4classifier(test,
                                   processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # train model
    model.train(train4cl, dev4cl, test4cl)
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")
    model.restore_session(config.dir_model)

    # create dataset
    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test,
                                config.processing_word,
                                config.processing_tag,
                                max_length=None)
        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev,
                                config.processing_word,
                                config.processing_tag,
                                max_length=None)
    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test,
                            config.processing_word,
                            config.processing_tag,
                            max_length=None)
    # evaluate and interact
    model.evaluate(test)
Beispiel #8
0
    def train_func(_config, reporter):
        # tf.reset_default_graph()
        config = Config()
        # for (key, val) in _config.items():
        #     # config[key] = val
        #     setattr(config, key[3:], val)
        # config["dir_output"] = ""
        setattr(config, "dir_output", "pretrain")
        setattr(config, "nepochs", 50)
        setattr(config, "batch_size", 80)

        pretrain_path = _config["30-pretrain_path"]
        PRETRAIN_MODE = _config["31-pretrain_mode"]

        if PRETRAIN_MODE:
            config_path = os.path.join(pretrain_path, "params.json")
            with open(config_path) as fin:
                content = fin.read().replace('\n', '')
                import json
                j = json.loads(content)
                for (key, val) in j.items():
                    setattr(config, key, val)


        model = NERModel(config)
        model.build()
        if PRETRAIN_MODE:
            model.restore_session(os.path.join(pretrain_path, "results/tmptmptest/bz=10-training-"
                                                          "bieo-nocnn/model.weights/"))
        model.train(train, dev, reporter)
Beispiel #9
0
def main():
    # create instance of config
    config = Config()

    pretrain_path = "/home/yinghong/project/tmp/s_t/ray_results/final/exp-final-epoch30" \
                    "/train_func_0_2018-06-16_01-24-13vmtghosb"

    config_path = os.path.join(pretrain_path, "params.json")
    with open(config_path) as fin:
        content = fin.read().replace('\n', '')
        import json
        j = json.loads(content)
        for (key, val) in j.items():
            setattr(config, key, val)

    # build model
    model = NERModel(config)
    model.build()

    model.restore_session(
        os.path.join(
            pretrain_path, "results/tmptmptest/bz=10-training-"
            "bieo-nocnn/model.weights/"))

    # create dataset
    # test  = CoNLLDataset(config.filename_test, config.processing_word,
    #                      config.processing_tag, config.max_iter)
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)

    # evaluate and interact
    model.tmp(dev, outfile="result-dev.txt")
    interactive_shell(model)
Beispiel #10
0
def main():
    # Предсказания моделью первого уровня #
    config_first = Config(dir_output='./results/train_first/')
    model = NERModel(config_first)
    model.build()
    model.restore_session(config_first.dir_model)
    test = CoNLLDataset(config_first.filename_test,
                        config_first.processing_word,
                        config_first.processing_tag, config_first.max_iter)

    print()
    print('Predicting first stage!')
    model.evaluate(test)
    print()

    test_predictions = model.predict_test(test)
    formatted_predictions = format_predictions(test_predictions, 'test',
                                               config_first)

    # Предсказания моделью второго уровня #
    tf.reset_default_graph()
    config_second = Config(dir_output='./results/train_second/')
    model = NERModel2(config_second)
    model.build()
    model.restore_session(config_second.dir_model)

    print()
    print('Predicting second stage!')
    model.evaluate(formatted_predictions)
    print()
def main():
    # create instance of config
    config = Config()

    config.dim_char = arg.dim_char
    config.hidden_size_char = arg.hidden_size_char
    config.hidden_size_lstm_1 = arg.hidden_size_lstm_1
    config.hidden_size_lstm_2 = arg.hidden_size_lstm_2
    config.cls_hidden_size = arg.cls_hidden_size
    config.batch_sample = arg.batch_sample
    config.elmo_scale = arg.elmo_scale
    config.lr_method = arg.lr_method
    config.batch_size = arg.batch_size
    config.learning_rate = arg.learning_rate
    config.decay_logic = arg.decay_logic
    config.run_name = arg.run_name
    config.input_feature_dim = 600 #config.hidden_size_lstm * 2 #+ 1024
    config.dir_saved_roi = arg.dir_saved_roi

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model + config.run_name + '/')

    # create dataset
    config.filename_test = config.dir_saved_roi + "test_word_ids/"
    test  = CoNLLDataset(config.filename_test)

    # evaluate and interact
    model.evaluate(test, config.test_total_entity)
Beispiel #12
0
def main():
    # create instance of config
    config = Config()
    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.processing_pos,
                       config.processing_chunk, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.processing_pos,
                         config.processing_chunk, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.processing_pos,
                        config.processing_chunk, config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.train(train, dev)
    model.restore_session(config.dir_model)
    model.evaluate(test)
Beispiel #13
0
def main():
    # create instance of config
    config_file = sys.argv[1]

    config = Config(config_file)

    print("dir model : ", config.dir_model)

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)
    # model.reinitialize_weights("words")
    # model.reinitialize_weights("chars")
    # model.reinitialize_weights("train_step")

    # Evaluate on another data set
    if len(sys.argv) > 2:
        test_file_name = sys.argv[2]
        test = CoNLLDataset(test_file_name, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", test_file_name, "..")


    # create dataset
    else:
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", config.filename_test, "..")

        # evaluate and interact
    # model.predict_test(test, output=sys.stdout)
    model.evaluate(test)
Beispiel #14
0
def main():
    config = Config()

    #-------------------------------------------------------------------
    # build model
    # ------------------------------------------------------------------
    model = NERModel(config)
    model.build()

    # ------------------------------------------------------------------
    # train mode
    # ------------------------------------------------------------------
    if config.mode == 'train':
        print('\n ... training model ... \n')
        test = CoNLLDataset(config.filename_test, config.processing_word,
                         config.processing_tag, config.max_iter)
        if config.periodic:
            split = CoNLLDataset(config.dummy_train, config.processing_word,
                         config.processing_tag, config.max_iter)
        else:
            split = CoNLLDataset(config.train_split[config.split], config.processing_word,
                         config.processing_tag, config.max_iter)
        model.train(split, test)

    # ------------------------------------------------------------------
    # retrain mode
    # ------------------------------------------------------------------
    if config.mode == 'retrain':
        print('\n ... retraining model ... \n')
        model.restore_session(config.dir_model)
        retrain = CoNLLDataset(config.filename_retrain, config.processing_word,
                           config.processing_tag, config.max_iter)
        test = CoNLLDataset(config.filename_test, config.processing_word,
                       config.processing_tag, config.max_iter)
        model.train(retrain, test)
Beispiel #15
0
def main(args):
    # create instance of config
    config = Config()
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    if args == "bilstm":
        model = NERModel(config, max_word_length, max_sequence_length)
    elif args == "gram_cnn":
        model = GRAM_CNNModel(config, max_word_length, max_sequence_length)
    elif args == "gate_cnn":
        model = CNNModel(config, max_word_length, max_sequence_length)
    elif args == "id_cnn":
        model = Dilated_CNNModel(config, max_word_length, max_sequence_length)
    model.build()
    model.restore_session(config.dir_model)
    model.evaluate(test)
Beispiel #16
0
def main():
    # create instance of config
    config = Config()

    config.dim_char = arg.dim_char
    config.hidden_size_char = arg.hidden_size_char
    config.hidden_size_lstm_1 = arg.hidden_size_lstm_1
    config.hidden_size_lstm_2 = arg.hidden_size_lstm_2
    config.batch_sample = arg.batch_sample
    config.elmo_scale = arg.elmo_scale
    config.lr_method = arg.lr_method
    config.batch_size = arg.batch_size
    config.learning_rate = arg.learning_rate
    config.decay_logic = arg.decay_logic
    config.run_name = arg.run_name

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model + config.run_name + '/')

    # create dataset
    test = CoNLLDataset(config.filename_test, config.elmofile_test,
                        config.processing_word, config.processing_postags,
                        config.generate_anchor, config.max_iter)
    model.evaluate(test)
Beispiel #17
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")
    model.restore_session(config.dir_model)

    # create dataset
    processing_word = get_processing_word(lowercase=True)

    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, processing_word)

        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, processing_word)

    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, processing_word)

    test4cl = CoNLLdata4classifier(test, processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # evaluate and interact
    model.evaluate(test4cl)
Beispiel #18
0
def pretrain():
    config = Config()
    pretrain_path = "/home/yinghong/project/tmp/s_t_rollback/ray_results/06" \
                    "-19/01-HasCNN/try5"
    # pretrain_path = "/home/yinghong/project/tmp/s_t_rollback/ray_results/06-19/best-HasCNN/try4"
    # reverse = True
    # cv = False

    config_path = os.path.join(pretrain_path, "params.json")
    with open(config_path) as fin:
        content = fin.read().replace('\n', '')
        import json
        j = json.loads(content)
        for (key, val) in j.items():
            setattr(config, key, val)
    model = NERModel(config)
    model.build()

    model.restore_session(
        os.path.join(
            pretrain_path, "results/tmptmptest/bz=10-training-"
            "bieo-nocnn/model.weights/"))

    # create dataset
    test = CoNLLDataset(config.filename_test,
                        config.processing_word,
                        config.processing_tag,
                        config.max_iter,
                        test=True)
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)

    # evaluate and interact
    model.tmp(dev, outfile="result-test-google85.63.txt")
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()
    config.nepochs = 200
    config.dropout = 0.5
    config.batch_size = 40
    config.lr_method = "adam"
    config.lr = 0.0007
    config.lr_decay = 0.97
    config.clip = -5.0  # if negative, no clipping
    config.nepoch_no_imprv = 20

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #20
0
def main2():
    # create instance of config
    config = Config()


    # build model
    model = NERModel(config)
    model.build()
    pretrain_path = "/home/yinghong/project/tmp/s_t/ray_results/final/" \
                    "exp-final-epoch30-sgd/train_func_0_2018-06-15_14-18-14bqpn6jv1"


    model.restore_session(os.path.join(pretrain_path, "results/tmptmptest/bz=10-training-"
                                                      "bieo-nocnn/model.weights/"))
# model.restore_session("results/crf/model.weights/") # optional, restore weights
#model.reinitialize_weights("proj")

    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
def main():
    # create instance of config
    config = Config()
    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # create dataset
    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, config.processing_word,
                                config.processing_tag, config.max_iter)
        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, config.processing_word,
                                config.processing_tag, config.max_iter)
    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
Beispiel #22
0
def main():
    # create instance of config
    dir_output = "./results/" + sys.argv[2] + "/"
    config = Config(dir_output, load=False)

    config.filename_words = "./data/words_" + sys.argv[2] + ".txt"
    config.filename_chars = "./data/chars_" + sys.argv[2] + ".txt"
    config.filename_tags = "./data/tags_" + sys.argv[2] + ".txt"

    #config.dir_output = "./results/" + sys.argv[2] + "/"
    config.dir_model = config.dir_output + "model.weights/"
    config.path_log = config.dir_output + "log.txt"

    #config.filename_dev = sys.argv[1]
    config.filename_test = sys.argv[1]
    #config.filename_train = sys.argv[3]
    config.filename_pred = sys.argv[1].replace(".txt", ".pred")

    config.load()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    #test  = CoNLLDataset(config.filename_test, config.processing_word,
    #                     config.processing_tag, config.max_iter)

    test = CoNLLDataset(sys.argv[1], config.processing_word,
                        config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
Beispiel #23
0
def get_model_api():
    """Returns lambda function for api"""

    # 1. initialize model once and for all
    config = Config()
    model = NERModel(config)
    model.build()
    model.restore_session("results/crf/model.weights/")

    def model_api(input_data):
        """
        Args:
            input_data: submitted to the API, raw string

        Returns:
            output_data: after some transformation, to be
                returned to the API

        """
        # 2. process input
        punc = [",", "?", ".", ":", ";", "!", "(", ")", "[", "]"]
        s = "".join(c for c in input_data if c not in punc)
        words_raw = s.strip().split(" ")

        # 3. call model predict function
        preds = model.predict(words_raw)

        # 4. process the output
        output_data = align_data({"input": words_raw, "output": preds})

        # 5. return the output for the api
        return output_data

    return model_api
Beispiel #24
0
def load_model():
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    return model
Beispiel #25
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)
    interactive_shell(model)
def setup():
    # create instance of config
    config = Config()
    # build model
    global graph, model
    graph = tf.get_default_graph()
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)
    setup_classifier()
Beispiel #27
0
def main(predict_file,save_file):
    # create instance of config
    config = Config()
    predict=CoNLLDataset(predict_file, config.processing_word, config.max_iter)
    max_sequence_length = max([len(seq[0]) for seq in predict])
    max_word_length = max([len(word[0]) for seq in predict for word in seq[0]])
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.restore_session(config.dir_model)
    model.run_predict(predict,save_file)
Beispiel #28
0
def main(args):
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    print(model.predict(args.sentence))
Beispiel #29
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.restore_session(config.dir_model)

    # evaluate and interact
    model.evaluate(config.dataset_test)
    interactive_shell(model)
Beispiel #30
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # predict
    path = "data-sequence-tagging/QA4IE-benchmark/"
    file_name_list = [
        "ie_test/0-400/ie_test.span",
        "seq/0-400/dev.seq",
        "seq/0-400/test.seq",
        "seq/0-400/train.seq",
        "seq/400-700/dev.seq",
        "seq/400-700/test.seq",
        "seq/400-700/train.seq",
        "seq/700-/dev.seq",
        "seq/700-/test.seq",
        "seq/700-/train.seq",
        "span/0-400/dev.span",
        "span/0-400/test.span",
        "span/0-400/train.span",
        "span/400-700/dev.span",
        "span/400-700/test.span",
        "span/400-700/train.span",
        "span/700-/dev.span",
        "span/700-/test.span",
        "span/700-/train.span"
    ]

    for file_name in file_name_list:
        ifs = open(path + file_name + ".json", 'r')
        ofs = open(path + file_name + ".ner", 'w')
        dataset_raw = ifs.read()
        dataset = json.loads(dataset_raw)
        index = 0
        for passage in dataset['data']:
            # start of one passage
            ofs.write('#' + str(index) + "\n\n")
            index = index + 1
            for paragraph in passage['paragraphs']:
                context = paragraph['context']
                word_list = context.split(' ')
                preds = model.predict(word_list)
                ofs.write('\n'.join(preds) + '\n\n')
            ofs.write('\n')
        ifs.close()
        ofs.close()
        print("successfully predict " + file_name + '\n')
Beispiel #31
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test  = CoNLLDataset(config.filename_test, config.processing_word,
                         config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
    interactive_shell(model)