コード例 #1
0
def main():
    config = Config()

    #-------------------------------------------------------------------
    # build model
    # ------------------------------------------------------------------
    model = NERModel(config)
    model.build()

    # ------------------------------------------------------------------
    # train mode
    # ------------------------------------------------------------------
    if config.mode == 'train':
        print('\n ... training model ... \n')
        test = CoNLLDataset(config.filename_test, config.processing_word,
                         config.processing_tag, config.max_iter)
        if config.periodic:
            split = CoNLLDataset(config.dummy_train, config.processing_word,
                         config.processing_tag, config.max_iter)
        else:
            split = CoNLLDataset(config.train_split[config.split], config.processing_word,
                         config.processing_tag, config.max_iter)
        model.train(split, test)

    # ------------------------------------------------------------------
    # retrain mode
    # ------------------------------------------------------------------
    if config.mode == 'retrain':
        print('\n ... retraining model ... \n')
        model.restore_session(config.dir_model)
        retrain = CoNLLDataset(config.filename_retrain, config.processing_word,
                           config.processing_tag, config.max_iter)
        test = CoNLLDataset(config.filename_test, config.processing_word,
                       config.processing_tag, config.max_iter)
        model.train(retrain, test)
コード例 #2
0
 def __init__(self):
     self.config = Config()
     self.config.dim_word = 250
     self.config.dim_char = 50
     self.model = NERModel(self.config)
     self.model.build()
     self.model.restore_session(self.MODEL_DIR)
コード例 #3
0
ファイル: train.py プロジェクト: lixusheng1/bacterial_NER
def main():
    # create instance of config
    config = Config()
    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.processing_pos,
                       config.processing_chunk, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.processing_pos,
                         config.processing_chunk, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.processing_pos,
                        config.processing_chunk, config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.train(train, dev)
    model.restore_session(config.dir_model)
    model.evaluate(test)
コード例 #4
0
ファイル: train.py プロジェクト: MissMuffin/sequence_tagging
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    model.train(train=config.dataset_train, dev=config.dataset_dev)
コード例 #5
0
def main():
    config = Config('./results/train_folds/')
    train_predictions_file = './data/predictions/formatted_train_predictions.npy'

    kf = KFold(n_splits=5)

    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    train = np.array([el for el in train])
    predictions = [0 for _ in train]

    for train_ids, evaluate_ids in kf.split(train):
        train_dataset = train[train_ids]
        evaluate_dataset = train[evaluate_ids]
        tf.reset_default_graph()
        config = Config('./results/train_folds/')
        model = NERModel(config)
        model.build()
        model.train(train_dataset, evaluate_dataset)
        for id, tags in zip(evaluate_ids,
                            model.predict_test(evaluate_dataset)):
            predictions[id] = tags
        model.close_session()

    predictions = np.array(predictions)
    formatted_predictions = format_predictions(predictions, 'train', config)
    np.save(train_predictions_file, formatted_predictions)
コード例 #6
0
def main():
    # create instance of config
    config = Config()
    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # create dataset
    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, config.processing_word,
                                config.processing_tag, config.max_iter)
        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, config.processing_word,
                                config.processing_tag, config.max_iter)
    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
コード例 #7
0
def main():
    # create instance of config
    config_file = sys.argv[1]

    config = Config(config_file)

    print("dir model : ", config.dir_model)

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)
    # model.reinitialize_weights("words")
    # model.reinitialize_weights("chars")
    # model.reinitialize_weights("train_step")

    # Evaluate on another data set
    if len(sys.argv) > 2:
        test_file_name = sys.argv[2]
        test = CoNLLDataset(test_file_name, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", test_file_name, "..")


    # create dataset
    else:
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", config.filename_test, "..")

        # evaluate and interact
    # model.predict_test(test, output=sys.stdout)
    model.evaluate(test)
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()
    config.nepochs = 200
    config.dropout = 0.5
    config.batch_size = 40
    config.lr_method = "adam"
    config.lr = 0.0007
    config.lr_decay = 0.97
    config.clip = -5.0  # if negative, no clipping
    config.nepoch_no_imprv = 20

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
コード例 #9
0
def pretrain():
    config = Config()
    pretrain_path = "/home/yinghong/project/tmp/s_t_rollback/ray_results/06" \
                    "-19/01-HasCNN/try5"
    # pretrain_path = "/home/yinghong/project/tmp/s_t_rollback/ray_results/06-19/best-HasCNN/try4"
    # reverse = True
    # cv = False

    config_path = os.path.join(pretrain_path, "params.json")
    with open(config_path) as fin:
        content = fin.read().replace('\n', '')
        import json
        j = json.loads(content)
        for (key, val) in j.items():
            setattr(config, key, val)
    model = NERModel(config)
    model.build()

    model.restore_session(
        os.path.join(
            pretrain_path, "results/tmptmptest/bz=10-training-"
            "bieo-nocnn/model.weights/"))

    # create dataset
    test = CoNLLDataset(config.filename_test,
                        config.processing_word,
                        config.processing_tag,
                        config.max_iter,
                        test=True)
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)

    # evaluate and interact
    model.tmp(dev, outfile="result-test-google85.63.txt")
コード例 #10
0
ファイル: evaluate.py プロジェクト: zxz53000/S-LSTM
def main():
    # create instance of config
    config = Config()
    config.layer = int(sys.argv[1])
    config.step = int(sys.argv[2])
    if config.task == 'pos':
        print("USING POS")
        config.filename_train = "data/train.pos"  # test
        config.filename_dev = "data/dev.pos"
        config.filename_test = "data/test.pos"
    else:
        print("USING NER")
    print("iteration: " + str(config.layer))
    print("step: " + str(config.step))

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
コード例 #11
0
def main():
    # create instance of config
    config = Config()
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)
    predict = CoNLLDataset("data/source_data.txt", config.processing_word,
                           config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]),
                              max([len(seq[0]) for seq in predict]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.restore_session(config.dir_model)
    model.run_predict(predict)
コード例 #12
0
def main():
    # create instance of config
    config = Config()

    pretrain_path = "/home/yinghong/project/tmp/s_t/ray_results/final/exp-final-epoch30" \
                    "/train_func_0_2018-06-16_01-24-13vmtghosb"

    config_path = os.path.join(pretrain_path, "params.json")
    with open(config_path) as fin:
        content = fin.read().replace('\n', '')
        import json
        j = json.loads(content)
        for (key, val) in j.items():
            setattr(config, key, val)

    # build model
    model = NERModel(config)
    model.build()

    model.restore_session(
        os.path.join(
            pretrain_path, "results/tmptmptest/bz=10-training-"
            "bieo-nocnn/model.weights/"))

    # create dataset
    # test  = CoNLLDataset(config.filename_test, config.processing_word,
    #                      config.processing_tag, config.max_iter)
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)

    # evaluate and interact
    model.tmp(dev, outfile="result-dev.txt")
    interactive_shell(model)
コード例 #13
0
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")
    model.restore_session(config.dir_model)

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    processing_word = get_processing_word(lowercase=True)
    dev = CoNLLDataset(config.filename_dev, processing_word)
    train = CoNLLDataset(config.filename_train, processing_word)
    test = CoNLLDataset(config.filename_test, processing_word)

    train4cl = CoNLLdata4classifier(train,
                                    processing_word=config.processing_word,
                                    processing_tag=config.processing_tag)
    dev4cl = CoNLLdata4classifier(dev,
                                  processing_word=config.processing_word,
                                  processing_tag=config.processing_tag)
    test4cl = CoNLLdata4classifier(test,
                                   processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # train model
    model.train(train4cl, dev4cl, test4cl)
コード例 #14
0
ファイル: pretrain-template.py プロジェクト: Fangyh09/s_t
def main2():
    # create instance of config
    config = Config()


    # build model
    model = NERModel(config)
    model.build()
    pretrain_path = "/home/yinghong/project/tmp/s_t/ray_results/final/" \
                    "exp-final-epoch30-sgd/train_func_0_2018-06-15_14-18-14bqpn6jv1"


    model.restore_session(os.path.join(pretrain_path, "results/tmptmptest/bz=10-training-"
                                                      "bieo-nocnn/model.weights/"))
# model.restore_session("results/crf/model.weights/") # optional, restore weights
#model.reinitialize_weights("proj")

    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
コード例 #15
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")
    model.restore_session(config.dir_model)

    # create dataset
    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test,
                                config.processing_word,
                                config.processing_tag,
                                max_length=None)
        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev,
                                config.processing_word,
                                config.processing_tag,
                                max_length=None)
    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test,
                            config.processing_word,
                            config.processing_tag,
                            max_length=None)
    # evaluate and interact
    model.evaluate(test)
def main():
    # create instance of config
    config = Config()

    config.dim_char = arg.dim_char
    config.hidden_size_char = arg.hidden_size_char
    config.hidden_size_lstm_1 = arg.hidden_size_lstm_1
    config.hidden_size_lstm_2 = arg.hidden_size_lstm_2
    config.cls_hidden_size = arg.cls_hidden_size
    config.batch_sample = arg.batch_sample
    config.elmo_scale = arg.elmo_scale
    config.lr_method = arg.lr_method
    config.batch_size = arg.batch_size
    config.learning_rate = arg.learning_rate
    config.decay_logic = arg.decay_logic
    config.run_name = arg.run_name
    config.input_feature_dim = 600 #config.hidden_size_lstm * 2 #+ 1024
    config.dir_saved_roi = arg.dir_saved_roi

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model + config.run_name + '/')

    # create dataset
    config.filename_test = config.dir_saved_roi + "test_word_ids/"
    test  = CoNLLDataset(config.filename_test)

    # evaluate and interact
    model.evaluate(test, config.test_total_entity)
コード例 #17
0
ファイル: evaluate_arg.py プロジェクト: ttklm20/CINEX
def main():
    # create instance of config
    dir_output = "./results/" + sys.argv[2] + "/"
    config = Config(dir_output, load=False)

    config.filename_words = "./data/words_" + sys.argv[2] + ".txt"
    config.filename_chars = "./data/chars_" + sys.argv[2] + ".txt"
    config.filename_tags = "./data/tags_" + sys.argv[2] + ".txt"

    #config.dir_output = "./results/" + sys.argv[2] + "/"
    config.dir_model = config.dir_output + "model.weights/"
    config.path_log = config.dir_output + "log.txt"

    #config.filename_dev = sys.argv[1]
    config.filename_test = sys.argv[1]
    #config.filename_train = sys.argv[3]
    config.filename_pred = sys.argv[1].replace(".txt", ".pred")

    config.load()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    #test  = CoNLLDataset(config.filename_test, config.processing_word,
    #                     config.processing_tag, config.max_iter)

    test = CoNLLDataset(sys.argv[1], config.processing_word,
                        config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
コード例 #18
0
ファイル: pretrain-template.py プロジェクト: Fangyh09/s_t
    def train_func(_config, reporter):
        # tf.reset_default_graph()
        config = Config()
        # for (key, val) in _config.items():
        #     # config[key] = val
        #     setattr(config, key[3:], val)
        # config["dir_output"] = ""
        setattr(config, "dir_output", "pretrain")
        setattr(config, "nepochs", 50)
        setattr(config, "batch_size", 80)

        pretrain_path = _config["30-pretrain_path"]
        PRETRAIN_MODE = _config["31-pretrain_mode"]

        if PRETRAIN_MODE:
            config_path = os.path.join(pretrain_path, "params.json")
            with open(config_path) as fin:
                content = fin.read().replace('\n', '')
                import json
                j = json.loads(content)
                for (key, val) in j.items():
                    setattr(config, key, val)


        model = NERModel(config)
        model.build()
        if PRETRAIN_MODE:
            model.restore_session(os.path.join(pretrain_path, "results/tmptmptest/bz=10-training-"
                                                          "bieo-nocnn/model.weights/"))
        model.train(train, dev, reporter)
コード例 #19
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")
    model.restore_session(config.dir_model)

    # create dataset
    processing_word = get_processing_word(lowercase=True)

    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, processing_word)

        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, processing_word)

    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, processing_word)

    test4cl = CoNLLdata4classifier(test, processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # evaluate and interact
    model.evaluate(test4cl)
コード例 #20
0
def main():
    # create instance of config
    config = Config()

    config.dim_char = arg.dim_char
    config.hidden_size_char = arg.hidden_size_char
    config.hidden_size_lstm_1 = arg.hidden_size_lstm_1
    config.hidden_size_lstm_2 = arg.hidden_size_lstm_2
    config.batch_sample = arg.batch_sample
    config.elmo_scale = arg.elmo_scale
    config.lr_method = arg.lr_method
    config.batch_size = arg.batch_size
    config.learning_rate = arg.learning_rate
    config.decay_logic = arg.decay_logic
    config.run_name = arg.run_name

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model + config.run_name + '/')

    # create dataset
    test = CoNLLDataset(config.filename_test, config.elmofile_test,
                        config.processing_word, config.processing_postags,
                        config.generate_anchor, config.max_iter)
    model.evaluate(test)
コード例 #21
0
    def __init__(self, load_lstm):

        import sys

        if load_lstm:
            sys.path.append('/home/rbshaffer/sequence_tagging')

            from model.ner_model import NERModel
            from model.config import Config
            config = Config()

            # build model
            self.model = NERModel(config)
            self.model.build()
            self.model.restore_session(config.dir_model)
コード例 #22
0
def main():
    # create instance of config
    config = Config()

    # build model
    if config.ensembles:
        model = Ensemble(config)
    else:
        model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = CoNLLDataset(config.filename_test, config.processing_word)
    #config.processing_tag, config.max_iter)

    # evaluate and interact
    #model.evaluate(test)

    for words, _ in test:
        preds = model.predict(words)
        # print
        for i in preds:
            print(i)
        print()
コード例 #23
0
def main():
    # create instance of config
    config = Config()
    if config.use_elmo: config.processing_word = None

    #build model
    model = NERModel(config)

    learn = NERLearner(config, model)
    learn.load()

    if len(sys.argv) == 1:
        print("No arguments given. Running full test")
        sys.argv.append("eval")
        sys.argv.append("pred")

    if sys.argv[1] == "eval":
        # create datasets
        test = CoNLLDataset(config.filename_test, config.processing_word,
                             config.processing_tag, config.max_iter)
        learn.evaluate(test)

    if sys.argv[1] == "pred" or sys.argv[2] == "pred":
        try:
            sent = (sys.argv[2] if sys.argv[1] == "pred" else sys.argv[3])
        except IndexError:
            sent = ["Peter", "Johnson", "lives", "in", "Los", "Angeles"]

        print("Predicting sentence: ", sent)
        pred = learn.predict(sent)
        print(pred)
コード例 #24
0
class NerProcessor(PostProcessor):
    ner_model = NERModel()

    def process(self, pred, source, model_id):
        if model_id == 'ar2en':
            return self.ner_model.entity_capitalization(pred)
        else:
            return pred
コード例 #25
0
ファイル: train.py プロジェクト: Praggie/sequence_tagging
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
コード例 #26
0
def main(predict_file,save_file):
    # create instance of config
    config = Config()
    predict=CoNLLDataset(predict_file, config.processing_word, config.max_iter)
    max_sequence_length = max([len(seq[0]) for seq in predict])
    max_word_length = max([len(word[0]) for seq in predict for word in seq[0]])
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.restore_session(config.dir_model)
    model.run_predict(predict,save_file)
コード例 #27
0
def main():
    # create instance of config
    config = Config()

    config.dim_char = arg.dim_char
    config.hidden_size_char = arg.hidden_size_char
    config.hidden_size_lstm_1 = arg.hidden_size_lstm_1
    config.hidden_size_lstm_2 = arg.hidden_size_lstm_2
    config.batch_sample = arg.batch_sample
    config.elmo_scale = arg.elmo_scale
    config.lr_method = arg.lr_method
    config.batch_size = arg.batch_size
    config.learning_rate = arg.learning_rate
    config.decay_logic = arg.decay_logic
    config.run_name = arg.run_name
    config.dir_saved_roi = arg.dir_saved_roi

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model + config.run_name + '/')

    # create dataset
    dev = CoNLLDataset(config.filename_dev, config.elmofile_dev,
                       config.bertfile_dev, config.processing_word,
                       config.processing_postags, config.generate_anchor,
                       config.max_iter)
    #train = CoNLLDataset(config.filename_train, config.elmofile_train, config.bertfile_train, config.processing_word,
    #                    config.processing_postags, config.generate_anchor,
    #                   config.max_iter)
    test = CoNLLDataset(config.filename_test, config.elmofile_test,
                        config.bertfile_test, config.processing_word,
                        config.processing_postags, config.generate_anchor,
                        config.max_iter)

    # evaluate and interact
    #model.dump(train, 'train')
    print("Dump Train feature done!")
    model.dump(dev, 'dev')

    print("Dump Dev feature done!")
    model.dump(test, 'test')
    print("Dump Test feature done!")
コード例 #28
0
ファイル: train_muse.py プロジェクト: mrvoh/LASERWordEmbedder
def fit(config, embedder, train, dev):
    #set output filename
    config.set_model_name(embedder.__class__.__name__)
    config.use_laser = False
    pad_len = 0 # no BPE fragments used
    # Initiate model
    model = NERModel(config, embedder,
                     pad_len)
    # train
    learn = NERLearner(config, model, pad_len, pad_len)
    learn.fit(train, dev)
コード例 #29
0
def buildModel():
    global globalModel
    config = Config()
    # build model
    if config.use_embedding_proj_pred:
        globalModel = ProjectionNERModel(config)
    else:
        globalModel = NERModel(config)
    globalModel.build()
    globalModel.restore_latest_session(config.dir_model_evaluate)
    return 'Success'
コード例 #30
0
def main():
    config = Config()

    # -----------------------------------------------------
    # restore model
    # -----------------------------------------------------
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # --------------------------------------------------------------
    # create dataset (test for evaluation & dev for active learning)
    # --------------------------------------------------------------
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)
    dev = CoNLLDataset(config.train_split[config.sample_split],
                       config.processing_word, config.processing_tag,
                       config.max_iter)

    sick = CoNLLDataset(config.filename_sick, config.processing_word,
                        config.processing_scores, config.max_iter)

    # -----------------------------------------------------
    # encode SICK dataset using pretrained NER model
    #------------------------------------------------------
    if config.encode:
        model.get_encoded(sick)

    # -----------------------------------------------------
    # determine threshold
    #-----------------------------------------------------
    #determine threshold for active learning
    #threshold = 20
    #model.get_threshold(test, threshold)
    # -----------------------------------------------------

    # -----------------------------------------------------
    # evaluate and interact
    # -----------------------------------------------------
    model.evaluate(test, dev, "test")
コード例 #31
0
ファイル: train.py プロジェクト: zhouuuuuu/EBM-NLP
def main(data_prefix=None):
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    if data_prefix:
        cwd = os.getcwd()
        config.filename_dev = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_dev))
        config.filename_test = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_test))
        config.filename_train = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_train))

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    print('training')
    model.train(train, dev)
コード例 #32
0
ファイル: evaluate.py プロジェクト: Praggie/sequence_tagging
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test  = CoNLLDataset(config.filename_test, config.processing_word,
                         config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
    interactive_shell(model)