Beispiel #1
0
def main():
    config = Config()

    #-------------------------------------------------------------------
    # build model
    # ------------------------------------------------------------------
    model = NERModel(config)
    model.build()

    # ------------------------------------------------------------------
    # train mode
    # ------------------------------------------------------------------
    if config.mode == 'train':
        print('\n ... training model ... \n')
        test = CoNLLDataset(config.filename_test, config.processing_word,
                         config.processing_tag, config.max_iter)
        if config.periodic:
            split = CoNLLDataset(config.dummy_train, config.processing_word,
                         config.processing_tag, config.max_iter)
        else:
            split = CoNLLDataset(config.train_split[config.split], config.processing_word,
                         config.processing_tag, config.max_iter)
        model.train(split, test)

    # ------------------------------------------------------------------
    # retrain mode
    # ------------------------------------------------------------------
    if config.mode == 'retrain':
        print('\n ... retraining model ... \n')
        model.restore_session(config.dir_model)
        retrain = CoNLLDataset(config.filename_retrain, config.processing_word,
                           config.processing_tag, config.max_iter)
        test = CoNLLDataset(config.filename_test, config.processing_word,
                       config.processing_tag, config.max_iter)
        model.train(retrain, test)
Beispiel #2
0
def main():
    config = Config('./results/train_folds/')
    train_predictions_file = './data/predictions/formatted_train_predictions.npy'

    kf = KFold(n_splits=5)

    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    train = np.array([el for el in train])
    predictions = [0 for _ in train]

    for train_ids, evaluate_ids in kf.split(train):
        train_dataset = train[train_ids]
        evaluate_dataset = train[evaluate_ids]
        tf.reset_default_graph()
        config = Config('./results/train_folds/')
        model = NERModel(config)
        model.build()
        model.train(train_dataset, evaluate_dataset)
        for id, tags in zip(evaluate_ids,
                            model.predict_test(evaluate_dataset)):
            predictions[id] = tags
        model.close_session()

    predictions = np.array(predictions)
    formatted_predictions = format_predictions(predictions, 'train', config)
    np.save(train_predictions_file, formatted_predictions)
Beispiel #3
0
def main2():
    # create instance of config
    config = Config()


    # build model
    model = NERModel(config)
    model.build()
    pretrain_path = "/home/yinghong/project/tmp/s_t/ray_results/final/" \
                    "exp-final-epoch30-sgd/train_func_0_2018-06-15_14-18-14bqpn6jv1"


    model.restore_session(os.path.join(pretrain_path, "results/tmptmptest/bz=10-training-"
                                                      "bieo-nocnn/model.weights/"))
# model.restore_session("results/crf/model.weights/") # optional, restore weights
#model.reinitialize_weights("proj")

    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #4
0
    def train_func(_config, reporter):
        # tf.reset_default_graph()
        config = Config()
        # for (key, val) in _config.items():
        #     # config[key] = val
        #     setattr(config, key[3:], val)
        # config["dir_output"] = ""
        setattr(config, "dir_output", "pretrain")
        setattr(config, "nepochs", 50)
        setattr(config, "batch_size", 80)

        pretrain_path = _config["30-pretrain_path"]
        PRETRAIN_MODE = _config["31-pretrain_mode"]

        if PRETRAIN_MODE:
            config_path = os.path.join(pretrain_path, "params.json")
            with open(config_path) as fin:
                content = fin.read().replace('\n', '')
                import json
                j = json.loads(content)
                for (key, val) in j.items():
                    setattr(config, key, val)


        model = NERModel(config)
        model.build()
        if PRETRAIN_MODE:
            model.restore_session(os.path.join(pretrain_path, "results/tmptmptest/bz=10-training-"
                                                          "bieo-nocnn/model.weights/"))
        model.train(train, dev, reporter)
Beispiel #5
0
def main():
    # create instance of config
    config = Config()

    config.dim_char = arg.dim_char
    config.hidden_size_char = arg.hidden_size_char
    config.hidden_size_lstm_1 = arg.hidden_size_lstm_1
    config.hidden_size_lstm_2 = arg.hidden_size_lstm_2
    config.cls_hidden_size = arg.cls_hidden_size
    config.batch_sample = arg.batch_sample
    config.elmo_scale = arg.elmo_scale
    config.lr_method = arg.lr_method
    config.batch_size = arg.batch_size
    config.learning_rate = arg.learning_rate
    config.decay_logic = arg.decay_logic
    config.run_name = arg.run_name
    config.input_feature_dim = 600  #config.hidden_size_lstm * 2 #+ 1024
    config.dir_saved_roi = arg.dir_saved_roi

    # build model
    model = NERModel(config)
    model.build()

    # create datasets
    config.filename_dev = config.dir_saved_roi + "dev_word_ids/"
    #config.filename_test = config.dir_saved_roi + "test_word_ids/"
    config.filename_train = config.dir_saved_roi + "train_word_ids/"

    dev = CoNLLDataset(config.filename_dev)
    print("Loading dev set done!")
    train = CoNLLDataset(config.filename_train)
    print("Loading train set done!")

    # train model
    model.train(train, dev, config.dev_total_entity)
def train_active(train, dev, test, select, config, modename):
    """
    Input: train set, test set, selection set, configurations
    Output: accuracy on dev set, test set, prediction on selection set
    Select Most & Least Certain Examples from Select set
    """
    # build model
    #tf.reset_default_graph()
    #gc.collect()
    #tf.get_variable_scope().reuse_variables()
    model = NERModel(config)
    model.build()
    print("Start training model...")
    print("Training size ", len(train))
    model.train(train, dev)

    # restore session
    model.restore_session(config.dir_model)

    # evaluate
    print("===Evaluating on test set:===")
    mode = "test" + modename
    model.evaluate(test, mode)

    # run on selection set

    print("Selecting samples for active learning...")
    if len(select) == 0:
        return []
    l = []
    for sent in select:
        output = model.predict(sent[0])
        l.append(output[1][0])
    #sort l
    return l  #most uncertain and least uncertain
Beispiel #7
0
def main():
    print("start time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

    # create instance of config
    config = Config()
    config.dim_char = arg.dim_char
    config.hidden_size_char = arg.hidden_size_char
    config.hidden_size_lstm_1 = arg.hidden_size_lstm_1
    config.hidden_size_lstm_2 = arg.hidden_size_lstm_2
    config.batch_sample = arg.batch_sample
    config.elmo_scale = arg.elmo_scale
    config.lr_method = arg.lr_method
    config.batch_size = arg.batch_size
    config.learning_rate = arg.learning_rate
    config.decay_logic = arg.decay_logic
    config.run_name = arg.run_name

    # build model
    model = NERModel(config)
    model.build()

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.elmofile_dev,
                       config.processing_word, config.processing_postags,
                       config.generate_anchor, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.elmofile_train,
                         config.processing_word, config.processing_postags,
                         config.generate_anchor, config.max_iter)

    # train model
    model.train(train, dev)
    print("end time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Beispiel #8
0
def main(data_prefix=None):
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    if data_prefix:
        cwd = os.getcwd()
        config.filename_dev = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_dev))
        config.filename_test = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_test))
        config.filename_train = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_train))

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    print('training')
    model.train(train, dev)
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    # processing_word = get_processing_word(lowercase=True)
    dev = CoNLLDataset(config.filename_dev)
    train = CoNLLDataset(config.filename_train)
    test = CoNLLDataset(config.filename_test)

    train4cl = CoNLLdata4classifier(train,
                                    processing_word=config.processing_word,
                                    processing_tag=config.processing_tag)
    dev4cl = CoNLLdata4classifier(dev,
                                  processing_word=config.processing_word,
                                  processing_tag=config.processing_tag)
    test4cl = CoNLLdata4classifier(test,
                                   processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # train model
    model.train(train4cl, dev4cl, test4cl)
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()
    config.nepochs = 200
    config.dropout = 0.5
    config.batch_size = 40
    config.lr_method = "adam"
    config.lr = 0.0007
    config.lr_decay = 0.97
    config.clip = -5.0  # if negative, no clipping
    config.nepoch_no_imprv = 20

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    #model.restore_session("results/test/model.weights/") # optional, restore weights
    #model.restore_session(config.dir_model)
    #model.reinitialize_weights("proj")

    # create datasets
    kValOfKmer = config.kValOfKmer
    dev = FKDataset(config.filename_dev, config.processing_word,
                    config.processing_tag, config.max_iter)
    train = FKDataset(config.filename_train, config.processing_word,
                      config.processing_tag, config.max_iter)

    if (config.use_coupling):
        train_for_coupling = DatasetForCouplingLoss(
            config.filename_lambda_dress, config.filename_lambda_jean,
            config.processing_word, config.processing_tag, config.max_iter,
            kValOfKmer)
    else:
        train_for_coupling = None

    # for i in train_for_coupling:
    #     print(i)

    # train model
    model.train(train, dev, train_for_coupling)
Beispiel #12
0
def main():
    # create instance of config
    config = Config()
    config.layer = int(sys.argv[1])
    config.step = int(sys.argv[2])

    if config.task == 'pos':
        print("USING POS")
        config.filename_train = "data/train.pos"  # test
        config.filename_dev = "data/dev.pos"
        config.filename_test = "data/test.pos"
    else:
        print("USING NER")
    print("iteration: " + str(config.layer))
    print("step: " + str(config.step))

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)
    # train model
    model.train(train, dev, test)
Beispiel #13
0
def main():
    # create instance of config
    config = Config()
    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.processing_pos,
                       config.processing_chunk, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.processing_pos,
                         config.processing_chunk, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.processing_pos,
                        config.processing_chunk, config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.train(train, dev)
    model.restore_session(config.dir_model)
    model.evaluate(test)
Beispiel #14
0
def main():

    word2id=load_data(FLAGS.voc_file)
    label2id=load_data(FLAGS.tag_voc)
    train_data=data_corpus(FLAGS.src_file,word2id,label2id)
    dev_data=data_corpus(FLAGS.src_file_dev,word2id,label2id)
    nermodel=NERModel(FLAGS,config,word2id,label2id)
    nermodel.build_model()
    nermodel.train(train_data,dev_data)
Beispiel #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--train_lang', type=str, default='en')
    parser.add_argument('--dev_lang', type=str, default='en')
    parser.add_argument('--test_lang', type=str, default='en')
    parser.add_argument('--is_pos', type=int, default=0, help='NER or POS?')

    parser.add_argument('--dataset', type=str, default='conll2003', help='Dataset directory')
    parser.add_argument('--dir', type=str, default=None, help='Output directory')
    parser.add_argument('--use_chars', type=int, default=1, help='Use character LSTM or not')
    parser.add_argument('--epoch', type=int, default=30)

    parser.add_argument('--emb_type', type=str, default='word', help='word | trans | word_trans')
    parser.add_argument('--emb_dim', type=int, default=300, help='Dimension of word embeddings')
    parser.add_argument('--model_dir', type=str, default='data/output_model_config_fb_wikitext103/', help='Transformer directory model')
    parser.add_argument('--layer', type=int, default=None, help='Select a single layer from Transformer')
    parser.add_argument('--trans_concat', type=str, default='all', help='all | sws | fws')
    parser.add_argument('--trans_dim', type=int, default=512, help='Transformer hidden size')
    parser.add_argument('--trans_layer', type=int, default=7, help='The total number of Transformer layers')

    parser.add_argument('--trans_type', type=str, default='monolingual', help="monolingual | crosslingual")
    parser.add_argument('--trans_vocab_src', type=str, default=None, help='Source language Transformer vocabulary')
    parser.add_argument('--trans_vocab_tgt', type=str, default=None, help='Target language Transformer vocabulary')

    args = parser.parse_args()

    # with tf.device('/cpu:0'):

    # create instance of config
    # print(args.use_attn, type(args.use_attn))

    langs = [args.train_lang, args.dev_lang, args.test_lang]
    #config = Config(mix_vocab=args.mix_vocab, use_crf=args.use_crf, mono_trans=args.mono_trans, is_pos=args.is_pos, emb_dim=args.emb_dim, src_lang=args.train_lang, tgt_lang=args.test_lang, no_glove=args.no_glove, select_layer=args.select_layer, weighted_sum_full=args.weighted_sum_full, naive_proj=args.naive_proj, highway=args.highway, weighted_sum=args.trans_weighted_sum, trans_dim=args.trans_dim, dataset=args.dataset, trans_vocab=args.trans_vocab, use_transformer=args.use_trans, dir_=args.dir, use_chars=args.use_chars, use_attn=args.use_attn, char_init=args.char_init, model_dir=args.model_dir, trans_to_output=args.trans_to_output, epoch=args.epoch)

    config = Config(args)

    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter, lang=args.dev_lang)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter, lang=args.train_lang)
    test  = CoNLLDataset(config.filename_test, config.processing_word,
                         config.processing_tag, config.max_iter, lang=args.test_lang)

    #n_vocab = len(config.vocab_trans)
    #n_ctx = max([dev.max_seq, train.max_seq, test.max_seq])

    # with tf.device('/cpu:0'):
    # build model

    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # train model
    model.train(train, dev)
Beispiel #16
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    model.train(train=config.dataset_train, dev=config.dataset_dev)
Beispiel #17
0
def train(config):
    # build model
    model = NERModel(config)
    model.build()

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #18
0
    def train_func(_config, reporter):
        # tf.reset_default_graph()
        config = Config()
        for (key, val) in _config.items():
            # config[key] = val
            setattr(config, key[3:], val)
        # config["dir_output"] = ""
        setattr(config, "dir_output", "")
        setattr(config, "nepochs", 50)

        model = NERModel(config)
        model.build()
        model.train(train, dev, reporter)
Beispiel #19
0
def main():
#     with tf.device("/device:GPU:0"):
        # create instance of config
        config = Config()

        # build model
        model = NERModel(config)
        model.build()
        # model.restore_session("results/crf/model.weights/") # optional, restore weights
        # model.reinitialize_weights("proj")s
        # create datasets
        dev   = PreProcessData(config.f_dev,config.processing_word, config.processing_tag, config.max_iter)
        train = PreProcessData(config.f_train, config.processing_word,
                             config.processing_tag, config.max_iter)
        # train model
        model.train(train, dev)
Beispiel #20
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev = CoNLLDataset(config.filename_dev, max_iter=config.max_iter)
    train = CoNLLDataset(config.filename_train, max_iter=config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #21
0
    def train_func(_config, reporter):
        # tf.reset_default_graph()
        config = Config()
        for (key, val) in _config.items():
            # config[key] = val
            setattr(config, key, val)
        setattr(config, "dir_output", "")
        setattr(config, "nepochs", 100)

        model = NERModel(config)
        model.build()
        dev = CoNLLDataset(config.filename_dev, config.processing_word,
                           config.processing_tag, config.max_iter)
        train = CoNLLDataset(config.filename_train, config.processing_word,
                             config.processing_tag, config.max_iter)
        model.train(train, dev, reporter)
Beispiel #22
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("tmp/model.weights/") # optional, restore weights

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()
    config.nepochs = 200
    config.dropout = 0.5
    config.batch_size = 60
    config.lr_method = "adam"
    config.lr = 0.0005
    config.lr_decay = 1.0
    config.clip = -2.0  # if negative, no clipping
    config.nepoch_no_imprv = 8

    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session("results/test/model.weights/",
                          indicate="fine_tuning")

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    # processing_word = get_processing_word(lowercase=True)
    dev = CoNLLDataset(config.filename_dev)
    train = CoNLLDataset(config.filename_train)
    test = CoNLLDataset(config.filename_test)

    # train model

    train4cl = CoNLLdata4classifier(train,
                                    processing_word=config.processing_word,
                                    processing_tag=config.processing_tag,
                                    context_length=config.context_length)
    dev4cl = CoNLLdata4classifier(dev,
                                  processing_word=config.processing_word,
                                  processing_tag=config.processing_tag,
                                  context_length=config.context_length)
    test4cl = CoNLLdata4classifier(test,
                                   processing_word=config.processing_word,
                                   processing_tag=config.processing_tag,
                                   context_length=config.context_length)

    model.train(train4cl, dev4cl, test4cl)
Beispiel #24
0
def main():
    # create instance of config
    config_file = sys.argv[1]
    config = Config(config_file)

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    model.train(train, dev)
Beispiel #25
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #26
0
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #27
0
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #28
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    print("dev filename:", config.filename_dev)
    dev = CoNLLDataset(config.filename_dev, config.processing_word_fuc,
                       config.processing_tag_fuc, config.max_iter)
    print("train filename:", config.filename_train)
    train = CoNLLDataset(config.filename_train, config.processing_word_fuc,
                         config.processing_tag_fuc, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #29
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    score = 0
    # model.restore_session(config.dir_model) # optional, restore weights
    # model.reinitialize_weights("proj")
    # make sure to make score equals models best score

    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev, score)
Beispiel #30
0
def main():
    # create instance of config
    config = Config()

    # clean-up any previous predictions
    # run('rm {}preds-*.pkl'.format(config.dir_output), shell=True)

    # build model
    model = NERModel(config)
    model.build()

    print('training on', config.filename_train)
    # create datasets
    dev   = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Beispiel #31
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()

    print('curr_increment', config.curr_increment)
    print('curr_iter', config.curr_iter)
    print('path_preds=', config.path_preds)

    model.restore_session(config.path_prev_model)

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    augment_occluded, augment_preds = [], []
    for split in config.augment_list:
        augment_occluded.append(
            CoNLLDataset(config.filename_augment_occluded.get(split),
                         config.processing_word, config.processing_tag,
                         config.max_iter))

        with open(
                config.path_preds.get(split) + 'preds-{}.pkl'.format(split),
                'rb') as f:
            augment_preds.append(pickle.load(f))
            if len(augment_preds[-1]) == 0:
                raise AttributeError('Error while trying to \
                load augment predictions from pickle.')

    # print(len(augment_preds))

    # train model
    model.train(train, dev, augment_occluded, augment_preds)