Ejemplo n.º 1
0
def init_model():
    train_data, dev_data, test_data, vocab = readdata()

    model = CNN((len(vocab), 128),
                num_classes=target_len,
                padding=2,
                dropout=0.1)
    #model = torch.load("rnnmodel/best_RNN_accuracy_2019-05-22-17-18-46")
    trainer = Trainer(model=model,
                      train_data=train_data,
                      dev_data=dev_data,
                      device=0,
                      save_path='cnnmodel',
                      loss=loss,
                      metrics=metrics,
                      callbacks=[FitlogCallback(test_data)])

    tester = Tester(test_data, model, metrics=AccuracyMetric())
    print(2)
    model2 = RNN(embed_num=len(vocab),
                 input_size=256,
                 hidden_size=256,
                 target_size=target_len)
    #model2 = torch.load("rnnmodel/best_RNN_accuracy_2019-05-22-17-18-46")
    trainer2 = Trainer(model=model2,
                       train_data=train_data,
                       dev_data=dev_data,
                       loss=loss,
                       metrics=metrics,
                       save_path='rnnmodel',
                       batch_size=32,
                       n_epochs=20,
                       device=0)
    tester2 = Tester(test_data, model, metrics=AccuracyMetric())
    return trainer, trainer2, tester, tester2
Ejemplo n.º 2
0
    def test_CheckPointCallback(self):

        from fastNLP import CheckPointCallback, Callback
        from fastNLP import Tester

        class RaiseCallback(Callback):
            def __init__(self, stop_step=10):
                super().__init__()
                self.stop_step = stop_step

            def on_backward_begin(self, loss):
                if self.step > self.stop_step:
                    raise RuntimeError()

        data_set, model = prepare_env()
        tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
        import fitlog

        fitlog.set_log_dir(self.tempdir, new_log=True)
        tempfile_path = os.path.join(self.tempdir, 'chkt.pt')
        callbacks = [CheckPointCallback(tempfile_path)]

        fitlog_callback = FitlogCallback(data_set, tester)
        callbacks.append(fitlog_callback)

        callbacks.append(RaiseCallback(100))
        try:
            trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                              batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                              metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                              callbacks=callbacks, check_code_level=2)
            trainer.train()
        except:
            pass
        #  用下面的代码模拟重新运行
        data_set, model = prepare_env()
        callbacks = [CheckPointCallback(tempfile_path)]
        tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
        fitlog_callback = FitlogCallback(data_set, tester)
        callbacks.append(fitlog_callback)

        trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                          batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                          metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                          callbacks=callbacks, check_code_level=2)
        trainer.train()
Ejemplo n.º 3
0
 def test_fitlog_callback(self):
     import fitlog
     fitlog.set_log_dir(self.tempdir, new_log=True)
     data_set, model = prepare_env()
     from fastNLP import Tester
     tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
     fitlog_callback = FitlogCallback(data_set, tester)
     
     trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                       batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                       callbacks=fitlog_callback, check_code_level=2)
     trainer.train()
Ejemplo n.º 4
0
data, char_embed, word_embed = cache()

print(data)

model = CNNBiLSTMCRF(word_embed,
                     char_embed,
                     hidden_size=1200,
                     num_layers=1,
                     tag_vocab=data.vocabs[Const.TARGET],
                     encoding_type=encoding_type,
                     dropout=dropout)

callbacks = [
    GradientClipCallback(clip_value=5, clip_type='value'),
    FitlogCallback(data.datasets['test'], verbose=1)
]

optimizer = SGD(model.parameters(), lr=lr, momentum=0.9)
scheduler = LRScheduler(
    LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)))
callbacks.append(scheduler)

trainer = Trainer(train_data=data.datasets['dev'][:100],
                  model=model,
                  optimizer=optimizer,
                  sampler=None,
                  device=0,
                  dev_data=data.datasets['dev'][:100],
                  batch_size=batch_size,
                  metrics=SpanFPreRecMetric(
Ejemplo n.º 5
0
dataset.set_input(Const.INPUT, Const.INPUT_LEN)
dataset.set_target(Const.TARGET)

testset.rename_field('words', Const.INPUT)
testset.rename_field('target', Const.TARGET)
testset.rename_field('seq_len', Const.INPUT_LEN)
testset.set_input(Const.INPUT, Const.INPUT_LEN)
testset.set_target(Const.TARGET)

train_data, dev_data = dataset.split(0.1)

loss = CrossEntropyLoss(pred=Const.OUTPUT, target=Const.TARGET)
metrics = AccuracyMetric(pred=Const.OUTPUT, target=Const.TARGET)
trainer = Trainer(model=model,
                  train_data=train_data,
                  dev_data=dev_data,
                  loss=loss,
                  batch_size=16,
                  metrics=metrics,
                  n_epochs=20,
                  callbacks=[FitlogCallback(dataset)])
trainer.train()

tester = Tester(data=testset, model=model, metrics=metrics)
tester.test()

tester = Tester(data=train_data, model=model, metrics=metrics)
tester.test()

fitlog.finish()
def train(config, task_name):
    train_data = pickle.load(
        open(os.path.join(config.bert_data_path, config.train_name), "rb"))
    print(train_data[0])
    # debug
    if config.debug:
        train_data = train_data[0:30]
    dev_data = pickle.load(
        open(os.path.join(config.bert_data_path, config.dev_name), "rb"))
    print(dev_data[0])
    # test_data = pickle.load(open(os.path.join(config.bert_data_path, config.test_name), "rb"))

    schemas = get_schemas(config.source_path)
    state_dict = torch.load(config.bert_path)
    # print(state_dict)
    text_model = BertForMultiLabelSequenceClassification.from_pretrained(
        config.bert_folder, state_dict=state_dict, num_labels=len(schemas))

    # optimizer
    param_optimizer = list(text_model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    num_train_optimization_steps = int(
        len(train_data) / config.batch_size /
        config.update_every) * config.epoch
    if config.local_rank != -1:
        num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size(
        )

    optimizer = BertAdam(
        lr=config.lr,
        warmup=config.warmup_proportion,
        t_total=num_train_optimization_steps).construct_from_pytorch(
            optimizer_grouped_parameters)

    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)
    logs = FitlogCallback(dev_data)
    f1 = F1_score(pred='output', target='label_id')

    trainer = Trainer(train_data=train_data,
                      model=text_model,
                      loss=BCEWithLogitsLoss(),
                      batch_size=config.batch_size,
                      check_code_level=-1,
                      metrics=f1,
                      metric_key='f1',
                      n_epochs=int(config.epoch),
                      dev_data=dev_data,
                      save_path=config.save_path,
                      print_every=config.print_every,
                      validate_every=config.validate_every,
                      update_every=config.update_every,
                      optimizer=optimizer,
                      use_tqdm=False,
                      device=config.device,
                      callbacks=[timing, early_stop, logs])
    trainer.train()

    # test result
    tester = Tester(
        dev_data,
        text_model,
        metrics=f1,
        device=config.device,
        batch_size=config.batch_size,
    )
    tester.test()
Ejemplo n.º 7
0
                              target='target',
                              seq_len='seq_len',
                              encoding_type=encoding_type)
acc_metric = AccuracyMetric(pred='pred', target='target', seq_len='seq_len')
metrics = [f1_metric, acc_metric]

if args.optim == 'adam':
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
elif args.optim == 'sgd':
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

callbacks = [
    FitlogCallback({
        'test': datasets['test'],
        'train': datasets['train']
    }),
    LRScheduler(
        lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.03)**ep))
]
print('label_vocab:{}\n{}'.format(len(vocabs['label']),
                                  vocabs['label'].idx2word))
trainer = Trainer(datasets['train'],
                  model,
                  optimizer=optimizer,
                  loss=loss,
                  metrics=metrics,
                  dev_data=datasets['dev'],
                  device=device,
                  batch_size=args.batch,
                  n_epochs=args.epoch,
Ejemplo n.º 8
0
        pred = pred.argmax(dim=-1).tolist()
        self.p.extend(pred)

    def get_metric(self, reset=True):
        f1_score = m.f1_score(self.l, self.p, average="macro")
        if reset:
            self.l = []
            self.p = []
        return {"f1:": f1_score}


optimizer = Adam(lr=args.lr, weight_decay=0)
acc = AccuracyMetric()
f1 = f1metric()
loss = CrossEntropyLoss()
trainer = Trainer(
    trainset,
    cla,
    optimizer=optimizer,
    loss=loss,
    batch_size=args.batch_size,
    n_epochs=args.num_epoch,
    dev_data=testset,
    metrics=[acc, f1],
    save_path=args.save_dir,
    callbacks=[FitlogCallback(log_loss_every=5)],
)
trainer.train()

fitlog.finish()  # finish the logging
Ejemplo n.º 9
0
    rnn_text_model = RNN.RNN_Text(vocab_size=m, input_size=50, hidden_layer_size=128, target_size=k, dropout=0.1)
    cnn_text_model = CNN.CNN_Text(vocab_size=m, input_size=50, target_size=k, dropout=0.05)
    model = rnn_text_model
    # ModelLoader.load_pytorch(model, "model_ckpt_large_CNN.pkl")

    trainer = Trainer(
        train_data=train_set,
        model=model,
        loss=CrossEntropyLoss(pred='pred', target='label'),
        n_epochs=50,
        batch_size=16,
        metrics=AccuracyMetric(pred='pred', target='label'),
        dev_data=dev_set,
        optimizer=Adam(lr=1e-3),
        callbacks=[FitlogCallback(data=test_set)]
    )
    trainer.train()

    # saver = ModelSaver("model_ckpt_large_RNN.pkl")
    # saver.save_pytorch(model)

    tester = Tester(
        data=train_set,
        model=model,
        metrics=AccuracyMetric(pred='pred', target='label'),
        batch_size=16,
    )
    tester.test()

    tester = Tester(
# elmo_embed = ElmoEmbedding(vocab=data.vocabs['cap_words'],
#                              model_dir_or_name='.',
#                              requires_grad=True, layers='mix')
# char_embed = StackEmbedding([elmo_embed, char_embed])

model = CNNBiLSTMCRF(word_embed,
                     char_embed,
                     hidden_size=200,
                     num_layers=1,
                     tag_vocab=data.vocabs[Const.TARGET],
                     encoding_type=encoding_type)

callbacks = [
    GradientClipCallback(clip_type='value', clip_value=5),
    FitlogCallback({'test': data.datasets['test']}, verbose=1),
    # SaveModelCallback('save_models/', top=3, only_param=False, save_on_exception=True)
]
# optimizer = Adam(model.parameters(), lr=0.001)
# optimizer = SWATS(model.parameters(), verbose=True)
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = LRScheduler(
    LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)))
callbacks.append(scheduler)

trainer = Trainer(train_data=data.datasets['train'],
                  model=model,
                  optimizer=optimizer,
                  sampler=BucketSampler(batch_size=20),
                  device=1,
                  dev_data=data.datasets['dev'],
Ejemplo n.º 11
0
def train(args):
    text_data = TextData()
    with open(os.path.join(args.vocab_dir, args.vocab_data), 'rb') as fin:
        text_data = pickle.load(fin)
    vocab_size = text_data.vocab_size
    class_num = text_data.class_num
    # class_num = 1
    seq_len = text_data.max_seq_len
    print("(vocab_size,class_num,seq_len):({0},{1},{2})".format(
        vocab_size, class_num, seq_len))

    train_data = text_data.train_set
    val_data = text_data.val_set
    test_data = text_data.test_set
    train_data.set_input('words', 'seq_len')
    train_data.set_target('target')
    val_data.set_input('words', 'seq_len')
    val_data.set_target('target')

    test_data.set_input('words', 'seq_len')
    test_data.set_target('target')

    init_embeds = None
    if args.pretrain_model == "None":
        print("No pretrained model with be used.")
        print("vocabsize:{0}".format(vocab_size))
        init_embeds = (vocab_size, args.embed_size)
    elif args.pretrain_model == "word2vec":
        embeds_path = os.path.join(args.prepare_dir, 'w2v_embeds.pkl')
        print("Loading Word2Vec pretrained embedding from {0}.".format(
            embeds_path))
        with open(embeds_path, 'rb') as fin:
            init_embeds = pickle.load(fin)
    elif args.pretrain_model == 'glove':
        embeds_path = os.path.join(args.prepare_dir, 'glove_embeds.pkl')
        print(
            "Loading Glove pretrained embedding from {0}.".format(embeds_path))
        with open(embeds_path, 'rb') as fin:
            init_embeds = pickle.load(fin)
    elif args.pretrain_model == 'glove2wv':
        embeds_path = os.path.join(args.prepare_dir, 'glove2wv_embeds.pkl')
        print(
            "Loading Glove pretrained embedding from {0}.".format(embeds_path))
        with open(embeds_path, 'rb') as fin:
            init_embeds = pickle.load(fin)
    else:
        init_embeds = (vocab_size, args.embed_size)

    if args.model == "CNNText":
        print("Using CNN Model.")
        model = CNNText(init_embeds,
                        num_classes=class_num,
                        padding=2,
                        dropout=args.dropout)
    elif args.model == "StarTransformer":
        print("Using StarTransformer Model.")
        model = STSeqCls(init_embeds,
                         num_cls=class_num,
                         hidden_size=args.hidden_size)
    elif args.model == "MyCNNText":
        model = MyCNNText(init_embeds=init_embeds,
                          num_classes=class_num,
                          padding=2,
                          dropout=args.dropout)
        print("Using user defined CNNText")
    elif args.model == "LSTMText":
        print("Using LSTM Model.")
        model = LSTMText(init_embeds=init_embeds,
                         output_dim=class_num,
                         hidden_dim=args.hidden_size,
                         num_layers=args.num_layers,
                         dropout=args.dropout)
    elif args.model == "Bert":
        print("Using Bert Model.")
    else:
        print("Using default model: CNNText.")
        model = CNNText((vocab_size, args.embed_size),
                        num_classes=class_num,
                        padding=2,
                        dropout=0.1)
    print(model)
    if args.cuda:
        device = torch.device('cuda')
    else:
        device = None

    print("train_size:{0} ; val_size:{1} ; test_size:{2}".format(
        train_data.get_length(), val_data.get_length(),
        test_data.get_length()))

    if args.optim == "Adam":
        print("Using Adam as optimizer.")
        optimizer = fastnlp_optim.Adam(lr=0.001,
                                       weight_decay=args.weight_decay)
        if (args.model_suffix == "default"):
            args.model_suffix == args.optim
    else:
        print("No Optimizer will be used.")
        optimizer = None

    criterion = CrossEntropyLoss()
    metric = AccuracyMetric()
    model_save_path = os.path.join(args.model_dir, args.model,
                                   args.model_suffix)
    earlystop = EarlyStopCallback(args.patience)
    fitlog_back = FitlogCallback({"val": val_data, "train": train_data})
    trainer = Trainer(train_data=train_data,
                      model=model,
                      save_path=model_save_path,
                      device=device,
                      n_epochs=args.epochs,
                      optimizer=optimizer,
                      dev_data=val_data,
                      loss=criterion,
                      batch_size=args.batch_size,
                      metrics=metric,
                      callbacks=[fitlog_back, earlystop])
    trainer.train()
    print("Train Done.")

    tester = Tester(data=val_data,
                    model=model,
                    metrics=metric,
                    batch_size=args.batch_size,
                    device=device)
    tester.test()
    print("Test Done.")

    print("Predict the answer with best model...")
    acc = 0.0
    output = []
    data_iterator = Batch(test_data, batch_size=args.batch_size)
    for data_x, batch_y in data_iterator:
        i_data = Variable(data_x['words']).cuda()
        pred = model(i_data)[C.OUTPUT]
        pred = pred.sigmoid()
        # print(pred.shape)
        output.append(pred.cpu().data)
    output = torch.cat(output, 0).numpy()
    print(output.shape)
    print("Predict Done. {} records".format(len(output)))
    result_save_path = os.path.join(args.result_dir,
                                    args.model + "_" + args.model_suffix)
    with open(result_save_path + ".pkl", 'wb') as f:
        pickle.dump(output, f)
    output = output.squeeze()[:, 1].tolist()
    projectid = text_data.test_projectid.values
    answers = []
    count = 0
    for i in range(len(output)):
        if output[i] > 0.5:
            count += 1
    print("true sample count:{}".format(count))
    add_count = 0
    for i in range(len(projectid) - len(output)):
        output.append([0.13])
        add_count += 1
    print("Add {} default result in predict.".format(add_count))

    df = pd.DataFrame()
    df['projectid'] = projectid
    df['y'] = output
    df.to_csv(result_save_path + ".csv", index=False)
    print("Predict Done, results saved to {}".format(result_save_path))

    fitlog.finish()
Ejemplo n.º 12
0
def train(config, task_name):
    train_data = pickle.load(
        open(os.path.join(config.data_path, config.train_name), "rb"))
    # debug
    if config.debug:
        train_data = train_data[0:100]
    dev_data = pickle.load(
        open(os.path.join(config.data_path, config.dev_name), "rb"))
    print(len(train_data), len(dev_data))
    # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb"))
    # load w2v data
    # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb"))

    word_vocab = pickle.load(
        open(os.path.join(config.data_path, config.word_vocab_name), "rb"))
    char_vocab = pickle.load(
        open(os.path.join(config.data_path, config.char_vocab_name), "rb"))
    pos_vocab = pickle.load(
        open(os.path.join(config.data_path, config.pos_vocab_name), "rb"))
    # spo_vocab = pickle.load(open(os.path.join(config.data_path, config.spo_vocab_name), "rb"))
    tag_vocab = pickle.load(
        open(os.path.join(config.data_path, config.tag_vocab_name), "rb"))
    print('word vocab', len(word_vocab))
    print('char vocab', len(char_vocab))
    print('pos vocab', len(pos_vocab))
    # print('spo vocab', len(spo_vocab))
    print('tag vocab', len(tag_vocab))

    schema = get_schemas(config.source_path)

    if task_name == 'bilstm_crf':
        model = AdvSeqLabel(
            char_init_embed=(len(char_vocab), config.char_embed_dim),
            word_init_embed=(len(word_vocab), config.word_embed_dim),
            pos_init_embed=(len(pos_vocab), config.pos_embed_dim),
            spo_embed_dim=len(schema),
            sentence_length=config.sentence_length,
            hidden_size=config.hidden_dim,
            num_classes=len(tag_vocab),
            dropout=config.dropout,
            id2words=tag_vocab.idx2word,
            encoding_type=config.encoding_type)
    elif task_name == 'trans_crf':
        model = TransformerSeqLabel(
            char_init_embed=(len(char_vocab), config.char_embed_dim),
            word_init_embed=(len(word_vocab), config.word_embed_dim),
            pos_init_embed=(len(pos_vocab), config.pos_embed_dim),
            spo_embed_dim=len(schema),
            num_classes=len(tag_vocab),
            id2words=tag_vocab.idx2word,
            encoding_type=config.encoding_type,
            num_layers=config.num_layers,
            inner_size=config.inner_size,
            key_size=config.key_size,
            value_size=config.value_size,
            num_head=config.num_head,
            dropout=config.dropout)

    optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay)
    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)
    # loss = NLLLoss()
    logs = FitlogCallback(dev_data)
    metrics = SpanFPreRecMetric(tag_vocab,
                                pred='pred',
                                seq_len='seq_len',
                                target='tag')

    train_data.set_input('tag')
    dev_data.set_input('tag')
    dev_data.set_target('seq_len')
    #print(train_data.get_field_names())
    trainer = Trainer(
        train_data=train_data,
        model=model,
        # loss=loss,
        metrics=metrics,
        metric_key='f',
        batch_size=config.batch_size,
        n_epochs=config.epoch,
        dev_data=dev_data,
        save_path=config.save_path,
        check_code_level=-1,
        print_every=config.print_every,
        validate_every=config.validate_every,
        optimizer=optimizer,
        use_tqdm=False,
        device=config.device,
        callbacks=[timing, early_stop, logs])
    trainer.train()

    # test result
    tester = Tester(dev_data,
                    model,
                    metrics=metrics,
                    device=config.device,
                    batch_size=config.batch_size)
    tester.test()
Ejemplo n.º 13
0
def train(args):
    text_data = TextData()
    with open(os.path.join(args.vocab_dir, args.vocab_data), 'rb') as fin:
        text_data = pickle.load(fin)
    vocab_size = text_data.vocab_size
    class_num = text_data.class_num
    seq_len = text_data.max_seq_len
    print("(vocab_size,class_num,seq_len):({0},{1},{2})".format(
        vocab_size, class_num, seq_len))

    train_data = text_data.train_set
    test_dev_data = text_data.test_set
    train_data.set_input('words', 'seq_len')
    train_data.set_target('target')
    test_dev_data.set_input('words', 'seq_len')
    test_dev_data.set_target('target')
    test_data, dev_data = test_dev_data.split(0.2)

    test_data = test_dev_data
    init_embeds = None
    if args.pretrain_model == "None":
        print("No pretrained model with be used.")
        print("vocabsize:{0}".format(vocab_size))
        init_embeds = (vocab_size, args.embed_size)
    elif args.pretrain_model == "word2vec":
        embeds_path = os.path.join(args.prepare_dir, 'w2v_embeds.pkl')
        print("Loading Word2Vec pretrained embedding from {0}.".format(
            embeds_path))
        with open(embeds_path, 'rb') as fin:
            init_embeds = pickle.load(fin)
    elif args.pretrain_model == 'glove':
        embeds_path = os.path.join(args.prepare_dir, 'glove_embeds.pkl')
        print(
            "Loading Glove pretrained embedding from {0}.".format(embeds_path))
        with open(embeds_path, 'rb') as fin:
            init_embeds = pickle.load(fin)
    elif args.pretrain_model == 'glove2wv':
        embeds_path = os.path.join(args.prepare_dir, 'glove2wv_embeds.pkl')
        print(
            "Loading Glove pretrained embedding from {0}.".format(embeds_path))
        with open(embeds_path, 'rb') as fin:
            init_embeds = pickle.load(fin)
    else:
        init_embeds = (vocab_size, args.embed_size)

    if args.model == "CNNText":
        print("Using CNN Model.")
        model = CNNText(init_embeds,
                        num_classes=class_num,
                        padding=2,
                        dropout=args.dropout)
    elif args.model == "StarTransformer":
        print("Using StarTransformer Model.")
        model = STSeqCls(init_embeds,
                         num_cls=class_num,
                         hidden_size=args.hidden_size)
    elif args.model == "MyCNNText":
        model = MyCNNText(init_embeds=init_embeds,
                          num_classes=class_num,
                          padding=2,
                          dropout=args.dropout)
        print("Using user defined CNNText")
    elif args.model == "LSTMText":
        print("Using LSTM Model.")
        model = LSTMText(init_embeds=init_embeds,
                         output_dim=class_num,
                         hidden_dim=args.hidden_size,
                         num_layers=args.num_layers,
                         dropout=args.dropout)
    elif args.model == "Bert":
        print("Using Bert Model.")
    else:
        print("Using default model: CNNText.")
        model = CNNText((vocab_size, args.embed_size),
                        num_classes=class_num,
                        padding=2,
                        dropout=0.1)
    print(model)
    if args.cuda:
        device = torch.device('cuda')
    else:
        device = None

    print("train_size:{0} ; dev_size:{1} ; test_size:{2}".format(
        train_data.get_length(), dev_data.get_length(),
        test_data.get_length()))

    if args.optim == "Adam":
        print("Using Adam as optimizer.")
        optimizer = fastnlp_optim.Adam(lr=0.001,
                                       weight_decay=args.weight_decay)
        if (args.model_suffix == "default"):
            args.model_suffix == args.optim
    else:
        print("No Optimizer will be used.")
        optimizer = None

    criterion = CrossEntropyLoss()
    metric = AccuracyMetric()
    model_save_path = os.path.join(args.model_dir, args.model,
                                   args.model_suffix)
    earlystop = EarlyStopCallback(args.patience)
    trainer = Trainer(train_data=train_data,
                      model=model,
                      save_path=model_save_path,
                      device=device,
                      n_epochs=args.epochs,
                      optimizer=optimizer,
                      dev_data=test_data,
                      loss=criterion,
                      batch_size=args.batch_size,
                      metrics=metric,
                      callbacks=[FitlogCallback(test_data), earlystop])
    trainer.train()
    print("Train Done.")

    tester = Tester(data=test_data,
                    model=model,
                    metrics=metric,
                    batch_size=args.batch_size,
                    device=device)
    tester.test()
    print("Test Done.")
    fitlog.finish()
Ejemplo n.º 14
0
def train(config, task_name):
    train_data = pickle.load(
        open(os.path.join(config.data_path, config.train_name), "rb"))
    # debug
    if config.debug:
        train_data = train_data[0:30]
    dev_data = pickle.load(
        open(os.path.join(config.data_path, config.dev_name), "rb"))
    # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb"))
    vocabulary = pickle.load(
        open(os.path.join(config.data_path, config.vocabulary_name), "rb"))

    # load w2v data
    # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb"))

    if task_name == "lstm":
        text_model = LSTM(vocab_size=len(vocabulary),
                          embed_dim=config.embed_dim,
                          output_dim=config.class_num,
                          hidden_dim=config.hidden_dim,
                          num_layers=config.num_layers,
                          dropout=config.dropout)
    elif task_name == "lstm_maxpool":
        text_model = LSTM_maxpool(vocab_size=len(vocabulary),
                                  embed_dim=config.embed_dim,
                                  output_dim=config.class_num,
                                  hidden_dim=config.hidden_dim,
                                  num_layers=config.num_layers,
                                  dropout=config.dropout)
    elif task_name == "cnn":
        text_model = CNN(vocab_size=len(vocabulary),
                         embed_dim=config.embed_dim,
                         class_num=config.class_num,
                         kernel_num=config.kernel_num,
                         kernel_sizes=config.kernel_sizes,
                         dropout=config.dropout,
                         static=config.static,
                         in_channels=config.in_channels)
    elif task_name == "rnn":
        text_model = RNN(vocab_size=len(vocabulary),
                         embed_dim=config.embed_dim,
                         output_dim=config.class_num,
                         hidden_dim=config.hidden_dim,
                         num_layers=config.num_layers,
                         dropout=config.dropout)
    # elif task_name == "cnn_w2v":
    #     text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
    #                          class_num=config.class_num, kernel_num=config.kernel_num,
    #                          kernel_sizes=config.kernel_sizes, dropout=config.dropout,
    #                          static=config.static, in_channels=config.in_channels,
    #                          weight=weight)
    elif task_name == "rcnn":
        text_model = RCNN(vocab_size=len(vocabulary),
                          embed_dim=config.embed_dim,
                          output_dim=config.class_num,
                          hidden_dim=config.hidden_dim,
                          num_layers=config.num_layers,
                          dropout=config.dropout)
    #elif task_name == "bert":
    #    text_model = BertModel.from_pretrained(config.bert_path)

    optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay)
    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)
    logs = FitlogCallback(dev_data)
    f1 = F1_score(pred='output', target='target')

    trainer = Trainer(train_data=train_data,
                      model=text_model,
                      loss=BCEWithLogitsLoss(),
                      batch_size=config.batch_size,
                      check_code_level=-1,
                      metrics=f1,
                      metric_key='f1',
                      n_epochs=config.epoch,
                      dev_data=dev_data,
                      save_path=config.save_path,
                      print_every=config.print_every,
                      validate_every=config.validate_every,
                      optimizer=optimizer,
                      use_tqdm=False,
                      device=config.device,
                      callbacks=[timing, early_stop, logs])
    trainer.train()

    # test result
    tester = Tester(
        dev_data,
        text_model,
        metrics=f1,
        device=config.device,
        batch_size=config.batch_size,
    )
    tester.test()
Ejemplo n.º 15
0
        "hidden_size": arg.hidden_size,
        "dropout": arg.dropout,
        "use_allennlp": False,
    },
)

optimizer = Adadelta(lr=arg.lr, params=model.parameters())
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

callbacks = [
    LRScheduler(scheduler),
]

if arg.task in ['snli']:
    callbacks.append(
        FitlogCallback(data_info.datasets[arg.testset_name], verbose=1))
elif arg.task == 'mnli':
    callbacks.append(
        FitlogCallback(
            {
                'dev_matched': data_info.datasets['dev_matched'],
                'dev_mismatched': data_info.datasets['dev_mismatched']
            },
            verbose=1))

trainer = Trainer(train_data=data_info.datasets['train'],
                  model=model,
                  optimizer=optimizer,
                  num_workers=0,
                  batch_size=arg.batch_size,
                  n_epochs=arg.n_epochs,
    optimizer = optim.AdamW(param_, lr=args.lr, weight_decay=args.weight_decay)
elif args.optim == 'sgd':
    # optimizer = optim.SGD(model.parameters(),lr=args.lr,momentum=args.momentum,
    #                       weight_decay=args.weight_decay)
    optimizer = optim.SGD(param_,
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

if 'msra' in args.dataset:
    datasets['dev'] = datasets['test']

fitlog_evaluate_dataset = {'test': datasets['test']}
if args.test_train:
    fitlog_evaluate_dataset['train'] = datasets['train']
evaluate_callback = FitlogCallback(fitlog_evaluate_dataset, verbose=1)
lrschedule_callback = LRScheduler(
    lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep)))
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)


# model.state_dict()
class CheckWeightCallback(Callback):
    def __init__(self, model):
        super().__init__()
        self.model_ = model

    def on_step_end(self):
        print('parameter weight:', flush=True)
        print(self.model_.state_dict()['encoder.layer_0.attn.w_q.weight'],
              flush=True)
Ejemplo n.º 17
0
                    embed_dim=128,
                    hidden_dim=hidden_units,
                    output_dim=8)
# mymodel = load_model(mymodel, './model/best_TextModel_acc_2019-06-28-09-07-50')
trainer = Trainer(
    train_data=train_data,
    model=mymodel,
    loss=CrossEntropyLoss(pred='pred', target='target'),
    # loss=SkipBudgetLoss(pred='pred', target='target', updated_states='updated_states'),
    metrics=[AccuracyMetric(), UsedStepsMetric()],
    n_epochs=30,
    batch_size=batch_size,
    print_every=-1,
    validate_every=-1,
    dev_data=test_data,
    save_path='./model',
    optimizer=Adam(lr=learning_rate, weight_decay=0),
    check_code_level=0,
    device="cuda",
    metric_key='acc',
    use_tqdm=False,
    callbacks=[FitlogCallback(test_data)])
start = time.clock()
trainer.train()
end = time.clock()
training_time = end - start
print('total training time:%fs' % (end - start))
fitlog.add_hyper({'time': training_time})

fitlog.finish()