def init_model(): train_data, dev_data, test_data, vocab = readdata() model = CNN((len(vocab), 128), num_classes=target_len, padding=2, dropout=0.1) #model = torch.load("rnnmodel/best_RNN_accuracy_2019-05-22-17-18-46") trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, device=0, save_path='cnnmodel', loss=loss, metrics=metrics, callbacks=[FitlogCallback(test_data)]) tester = Tester(test_data, model, metrics=AccuracyMetric()) print(2) model2 = RNN(embed_num=len(vocab), input_size=256, hidden_size=256, target_size=target_len) #model2 = torch.load("rnnmodel/best_RNN_accuracy_2019-05-22-17-18-46") trainer2 = Trainer(model=model2, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics, save_path='rnnmodel', batch_size=32, n_epochs=20, device=0) tester2 = Tester(test_data, model, metrics=AccuracyMetric()) return trainer, trainer2, tester, tester2
def test_CheckPointCallback(self): from fastNLP import CheckPointCallback, Callback from fastNLP import Tester class RaiseCallback(Callback): def __init__(self, stop_step=10): super().__init__() self.stop_step = stop_step def on_backward_begin(self, loss): if self.step > self.stop_step: raise RuntimeError() data_set, model = prepare_env() tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) import fitlog fitlog.set_log_dir(self.tempdir, new_log=True) tempfile_path = os.path.join(self.tempdir, 'chkt.pt') callbacks = [CheckPointCallback(tempfile_path)] fitlog_callback = FitlogCallback(data_set, tester) callbacks.append(fitlog_callback) callbacks.append(RaiseCallback(100)) try: trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=callbacks, check_code_level=2) trainer.train() except: pass # 用下面的代码模拟重新运行 data_set, model = prepare_env() callbacks = [CheckPointCallback(tempfile_path)] tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) fitlog_callback = FitlogCallback(data_set, tester) callbacks.append(fitlog_callback) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=callbacks, check_code_level=2) trainer.train()
def test_fitlog_callback(self): import fitlog fitlog.set_log_dir(self.tempdir, new_log=True) data_set, model = prepare_env() from fastNLP import Tester tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) fitlog_callback = FitlogCallback(data_set, tester) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=fitlog_callback, check_code_level=2) trainer.train()
data, char_embed, word_embed = cache() print(data) model = CNNBiLSTMCRF(word_embed, char_embed, hidden_size=1200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type, dropout=dropout) callbacks = [ GradientClipCallback(clip_value=5, clip_type='value'), FitlogCallback(data.datasets['test'], verbose=1) ] optimizer = SGD(model.parameters(), lr=lr, momentum=0.9) scheduler = LRScheduler( LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) callbacks.append(scheduler) trainer = Trainer(train_data=data.datasets['dev'][:100], model=model, optimizer=optimizer, sampler=None, device=0, dev_data=data.datasets['dev'][:100], batch_size=batch_size, metrics=SpanFPreRecMetric(
dataset.set_input(Const.INPUT, Const.INPUT_LEN) dataset.set_target(Const.TARGET) testset.rename_field('words', Const.INPUT) testset.rename_field('target', Const.TARGET) testset.rename_field('seq_len', Const.INPUT_LEN) testset.set_input(Const.INPUT, Const.INPUT_LEN) testset.set_target(Const.TARGET) train_data, dev_data = dataset.split(0.1) loss = CrossEntropyLoss(pred=Const.OUTPUT, target=Const.TARGET) metrics = AccuracyMetric(pred=Const.OUTPUT, target=Const.TARGET) trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=loss, batch_size=16, metrics=metrics, n_epochs=20, callbacks=[FitlogCallback(dataset)]) trainer.train() tester = Tester(data=testset, model=model, metrics=metrics) tester.test() tester = Tester(data=train_data, model=model, metrics=metrics) tester.test() fitlog.finish()
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.bert_data_path, config.train_name), "rb")) print(train_data[0]) # debug if config.debug: train_data = train_data[0:30] dev_data = pickle.load( open(os.path.join(config.bert_data_path, config.dev_name), "rb")) print(dev_data[0]) # test_data = pickle.load(open(os.path.join(config.bert_data_path, config.test_name), "rb")) schemas = get_schemas(config.source_path) state_dict = torch.load(config.bert_path) # print(state_dict) text_model = BertForMultiLabelSequenceClassification.from_pretrained( config.bert_folder, state_dict=state_dict, num_labels=len(schemas)) # optimizer param_optimizer = list(text_model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int( len(train_data) / config.batch_size / config.update_every) * config.epoch if config.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size( ) optimizer = BertAdam( lr=config.lr, warmup=config.warmup_proportion, t_total=num_train_optimization_steps).construct_from_pytorch( optimizer_grouped_parameters) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) logs = FitlogCallback(dev_data) f1 = F1_score(pred='output', target='label_id') trainer = Trainer(train_data=train_data, model=text_model, loss=BCEWithLogitsLoss(), batch_size=config.batch_size, check_code_level=-1, metrics=f1, metric_key='f1', n_epochs=int(config.epoch), dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, update_every=config.update_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester( dev_data, text_model, metrics=f1, device=config.device, batch_size=config.batch_size, ) tester.test()
target='target', seq_len='seq_len', encoding_type=encoding_type) acc_metric = AccuracyMetric(pred='pred', target='target', seq_len='seq_len') metrics = [f1_metric, acc_metric] if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr) elif args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) callbacks = [ FitlogCallback({ 'test': datasets['test'], 'train': datasets['train'] }), LRScheduler( lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.03)**ep)) ] print('label_vocab:{}\n{}'.format(len(vocabs['label']), vocabs['label'].idx2word)) trainer = Trainer(datasets['train'], model, optimizer=optimizer, loss=loss, metrics=metrics, dev_data=datasets['dev'], device=device, batch_size=args.batch, n_epochs=args.epoch,
pred = pred.argmax(dim=-1).tolist() self.p.extend(pred) def get_metric(self, reset=True): f1_score = m.f1_score(self.l, self.p, average="macro") if reset: self.l = [] self.p = [] return {"f1:": f1_score} optimizer = Adam(lr=args.lr, weight_decay=0) acc = AccuracyMetric() f1 = f1metric() loss = CrossEntropyLoss() trainer = Trainer( trainset, cla, optimizer=optimizer, loss=loss, batch_size=args.batch_size, n_epochs=args.num_epoch, dev_data=testset, metrics=[acc, f1], save_path=args.save_dir, callbacks=[FitlogCallback(log_loss_every=5)], ) trainer.train() fitlog.finish() # finish the logging
rnn_text_model = RNN.RNN_Text(vocab_size=m, input_size=50, hidden_layer_size=128, target_size=k, dropout=0.1) cnn_text_model = CNN.CNN_Text(vocab_size=m, input_size=50, target_size=k, dropout=0.05) model = rnn_text_model # ModelLoader.load_pytorch(model, "model_ckpt_large_CNN.pkl") trainer = Trainer( train_data=train_set, model=model, loss=CrossEntropyLoss(pred='pred', target='label'), n_epochs=50, batch_size=16, metrics=AccuracyMetric(pred='pred', target='label'), dev_data=dev_set, optimizer=Adam(lr=1e-3), callbacks=[FitlogCallback(data=test_set)] ) trainer.train() # saver = ModelSaver("model_ckpt_large_RNN.pkl") # saver.save_pytorch(model) tester = Tester( data=train_set, model=model, metrics=AccuracyMetric(pred='pred', target='label'), batch_size=16, ) tester.test() tester = Tester(
# elmo_embed = ElmoEmbedding(vocab=data.vocabs['cap_words'], # model_dir_or_name='.', # requires_grad=True, layers='mix') # char_embed = StackEmbedding([elmo_embed, char_embed]) model = CNNBiLSTMCRF(word_embed, char_embed, hidden_size=200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type) callbacks = [ GradientClipCallback(clip_type='value', clip_value=5), FitlogCallback({'test': data.datasets['test']}, verbose=1), # SaveModelCallback('save_models/', top=3, only_param=False, save_on_exception=True) ] # optimizer = Adam(model.parameters(), lr=0.001) # optimizer = SWATS(model.parameters(), verbose=True) optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) scheduler = LRScheduler( LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) callbacks.append(scheduler) trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(batch_size=20), device=1, dev_data=data.datasets['dev'],
def train(args): text_data = TextData() with open(os.path.join(args.vocab_dir, args.vocab_data), 'rb') as fin: text_data = pickle.load(fin) vocab_size = text_data.vocab_size class_num = text_data.class_num # class_num = 1 seq_len = text_data.max_seq_len print("(vocab_size,class_num,seq_len):({0},{1},{2})".format( vocab_size, class_num, seq_len)) train_data = text_data.train_set val_data = text_data.val_set test_data = text_data.test_set train_data.set_input('words', 'seq_len') train_data.set_target('target') val_data.set_input('words', 'seq_len') val_data.set_target('target') test_data.set_input('words', 'seq_len') test_data.set_target('target') init_embeds = None if args.pretrain_model == "None": print("No pretrained model with be used.") print("vocabsize:{0}".format(vocab_size)) init_embeds = (vocab_size, args.embed_size) elif args.pretrain_model == "word2vec": embeds_path = os.path.join(args.prepare_dir, 'w2v_embeds.pkl') print("Loading Word2Vec pretrained embedding from {0}.".format( embeds_path)) with open(embeds_path, 'rb') as fin: init_embeds = pickle.load(fin) elif args.pretrain_model == 'glove': embeds_path = os.path.join(args.prepare_dir, 'glove_embeds.pkl') print( "Loading Glove pretrained embedding from {0}.".format(embeds_path)) with open(embeds_path, 'rb') as fin: init_embeds = pickle.load(fin) elif args.pretrain_model == 'glove2wv': embeds_path = os.path.join(args.prepare_dir, 'glove2wv_embeds.pkl') print( "Loading Glove pretrained embedding from {0}.".format(embeds_path)) with open(embeds_path, 'rb') as fin: init_embeds = pickle.load(fin) else: init_embeds = (vocab_size, args.embed_size) if args.model == "CNNText": print("Using CNN Model.") model = CNNText(init_embeds, num_classes=class_num, padding=2, dropout=args.dropout) elif args.model == "StarTransformer": print("Using StarTransformer Model.") model = STSeqCls(init_embeds, num_cls=class_num, hidden_size=args.hidden_size) elif args.model == "MyCNNText": model = MyCNNText(init_embeds=init_embeds, num_classes=class_num, padding=2, dropout=args.dropout) print("Using user defined CNNText") elif args.model == "LSTMText": print("Using LSTM Model.") model = LSTMText(init_embeds=init_embeds, output_dim=class_num, hidden_dim=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout) elif args.model == "Bert": print("Using Bert Model.") else: print("Using default model: CNNText.") model = CNNText((vocab_size, args.embed_size), num_classes=class_num, padding=2, dropout=0.1) print(model) if args.cuda: device = torch.device('cuda') else: device = None print("train_size:{0} ; val_size:{1} ; test_size:{2}".format( train_data.get_length(), val_data.get_length(), test_data.get_length())) if args.optim == "Adam": print("Using Adam as optimizer.") optimizer = fastnlp_optim.Adam(lr=0.001, weight_decay=args.weight_decay) if (args.model_suffix == "default"): args.model_suffix == args.optim else: print("No Optimizer will be used.") optimizer = None criterion = CrossEntropyLoss() metric = AccuracyMetric() model_save_path = os.path.join(args.model_dir, args.model, args.model_suffix) earlystop = EarlyStopCallback(args.patience) fitlog_back = FitlogCallback({"val": val_data, "train": train_data}) trainer = Trainer(train_data=train_data, model=model, save_path=model_save_path, device=device, n_epochs=args.epochs, optimizer=optimizer, dev_data=val_data, loss=criterion, batch_size=args.batch_size, metrics=metric, callbacks=[fitlog_back, earlystop]) trainer.train() print("Train Done.") tester = Tester(data=val_data, model=model, metrics=metric, batch_size=args.batch_size, device=device) tester.test() print("Test Done.") print("Predict the answer with best model...") acc = 0.0 output = [] data_iterator = Batch(test_data, batch_size=args.batch_size) for data_x, batch_y in data_iterator: i_data = Variable(data_x['words']).cuda() pred = model(i_data)[C.OUTPUT] pred = pred.sigmoid() # print(pred.shape) output.append(pred.cpu().data) output = torch.cat(output, 0).numpy() print(output.shape) print("Predict Done. {} records".format(len(output))) result_save_path = os.path.join(args.result_dir, args.model + "_" + args.model_suffix) with open(result_save_path + ".pkl", 'wb') as f: pickle.dump(output, f) output = output.squeeze()[:, 1].tolist() projectid = text_data.test_projectid.values answers = [] count = 0 for i in range(len(output)): if output[i] > 0.5: count += 1 print("true sample count:{}".format(count)) add_count = 0 for i in range(len(projectid) - len(output)): output.append([0.13]) add_count += 1 print("Add {} default result in predict.".format(add_count)) df = pd.DataFrame() df['projectid'] = projectid df['y'] = output df.to_csv(result_save_path + ".csv", index=False) print("Predict Done, results saved to {}".format(result_save_path)) fitlog.finish()
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.data_path, config.train_name), "rb")) # debug if config.debug: train_data = train_data[0:100] dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) print(len(train_data), len(dev_data)) # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) # load w2v data # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) word_vocab = pickle.load( open(os.path.join(config.data_path, config.word_vocab_name), "rb")) char_vocab = pickle.load( open(os.path.join(config.data_path, config.char_vocab_name), "rb")) pos_vocab = pickle.load( open(os.path.join(config.data_path, config.pos_vocab_name), "rb")) # spo_vocab = pickle.load(open(os.path.join(config.data_path, config.spo_vocab_name), "rb")) tag_vocab = pickle.load( open(os.path.join(config.data_path, config.tag_vocab_name), "rb")) print('word vocab', len(word_vocab)) print('char vocab', len(char_vocab)) print('pos vocab', len(pos_vocab)) # print('spo vocab', len(spo_vocab)) print('tag vocab', len(tag_vocab)) schema = get_schemas(config.source_path) if task_name == 'bilstm_crf': model = AdvSeqLabel( char_init_embed=(len(char_vocab), config.char_embed_dim), word_init_embed=(len(word_vocab), config.word_embed_dim), pos_init_embed=(len(pos_vocab), config.pos_embed_dim), spo_embed_dim=len(schema), sentence_length=config.sentence_length, hidden_size=config.hidden_dim, num_classes=len(tag_vocab), dropout=config.dropout, id2words=tag_vocab.idx2word, encoding_type=config.encoding_type) elif task_name == 'trans_crf': model = TransformerSeqLabel( char_init_embed=(len(char_vocab), config.char_embed_dim), word_init_embed=(len(word_vocab), config.word_embed_dim), pos_init_embed=(len(pos_vocab), config.pos_embed_dim), spo_embed_dim=len(schema), num_classes=len(tag_vocab), id2words=tag_vocab.idx2word, encoding_type=config.encoding_type, num_layers=config.num_layers, inner_size=config.inner_size, key_size=config.key_size, value_size=config.value_size, num_head=config.num_head, dropout=config.dropout) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) # loss = NLLLoss() logs = FitlogCallback(dev_data) metrics = SpanFPreRecMetric(tag_vocab, pred='pred', seq_len='seq_len', target='tag') train_data.set_input('tag') dev_data.set_input('tag') dev_data.set_target('seq_len') #print(train_data.get_field_names()) trainer = Trainer( train_data=train_data, model=model, # loss=loss, metrics=metrics, metric_key='f', batch_size=config.batch_size, n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, check_code_level=-1, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester(dev_data, model, metrics=metrics, device=config.device, batch_size=config.batch_size) tester.test()
def train(args): text_data = TextData() with open(os.path.join(args.vocab_dir, args.vocab_data), 'rb') as fin: text_data = pickle.load(fin) vocab_size = text_data.vocab_size class_num = text_data.class_num seq_len = text_data.max_seq_len print("(vocab_size,class_num,seq_len):({0},{1},{2})".format( vocab_size, class_num, seq_len)) train_data = text_data.train_set test_dev_data = text_data.test_set train_data.set_input('words', 'seq_len') train_data.set_target('target') test_dev_data.set_input('words', 'seq_len') test_dev_data.set_target('target') test_data, dev_data = test_dev_data.split(0.2) test_data = test_dev_data init_embeds = None if args.pretrain_model == "None": print("No pretrained model with be used.") print("vocabsize:{0}".format(vocab_size)) init_embeds = (vocab_size, args.embed_size) elif args.pretrain_model == "word2vec": embeds_path = os.path.join(args.prepare_dir, 'w2v_embeds.pkl') print("Loading Word2Vec pretrained embedding from {0}.".format( embeds_path)) with open(embeds_path, 'rb') as fin: init_embeds = pickle.load(fin) elif args.pretrain_model == 'glove': embeds_path = os.path.join(args.prepare_dir, 'glove_embeds.pkl') print( "Loading Glove pretrained embedding from {0}.".format(embeds_path)) with open(embeds_path, 'rb') as fin: init_embeds = pickle.load(fin) elif args.pretrain_model == 'glove2wv': embeds_path = os.path.join(args.prepare_dir, 'glove2wv_embeds.pkl') print( "Loading Glove pretrained embedding from {0}.".format(embeds_path)) with open(embeds_path, 'rb') as fin: init_embeds = pickle.load(fin) else: init_embeds = (vocab_size, args.embed_size) if args.model == "CNNText": print("Using CNN Model.") model = CNNText(init_embeds, num_classes=class_num, padding=2, dropout=args.dropout) elif args.model == "StarTransformer": print("Using StarTransformer Model.") model = STSeqCls(init_embeds, num_cls=class_num, hidden_size=args.hidden_size) elif args.model == "MyCNNText": model = MyCNNText(init_embeds=init_embeds, num_classes=class_num, padding=2, dropout=args.dropout) print("Using user defined CNNText") elif args.model == "LSTMText": print("Using LSTM Model.") model = LSTMText(init_embeds=init_embeds, output_dim=class_num, hidden_dim=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout) elif args.model == "Bert": print("Using Bert Model.") else: print("Using default model: CNNText.") model = CNNText((vocab_size, args.embed_size), num_classes=class_num, padding=2, dropout=0.1) print(model) if args.cuda: device = torch.device('cuda') else: device = None print("train_size:{0} ; dev_size:{1} ; test_size:{2}".format( train_data.get_length(), dev_data.get_length(), test_data.get_length())) if args.optim == "Adam": print("Using Adam as optimizer.") optimizer = fastnlp_optim.Adam(lr=0.001, weight_decay=args.weight_decay) if (args.model_suffix == "default"): args.model_suffix == args.optim else: print("No Optimizer will be used.") optimizer = None criterion = CrossEntropyLoss() metric = AccuracyMetric() model_save_path = os.path.join(args.model_dir, args.model, args.model_suffix) earlystop = EarlyStopCallback(args.patience) trainer = Trainer(train_data=train_data, model=model, save_path=model_save_path, device=device, n_epochs=args.epochs, optimizer=optimizer, dev_data=test_data, loss=criterion, batch_size=args.batch_size, metrics=metric, callbacks=[FitlogCallback(test_data), earlystop]) trainer.train() print("Train Done.") tester = Tester(data=test_data, model=model, metrics=metric, batch_size=args.batch_size, device=device) tester.test() print("Test Done.") fitlog.finish()
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.data_path, config.train_name), "rb")) # debug if config.debug: train_data = train_data[0:30] dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load( open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) # elif task_name == "cnn_w2v": # text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, # class_num=config.class_num, kernel_num=config.kernel_num, # kernel_sizes=config.kernel_sizes, dropout=config.dropout, # static=config.static, in_channels=config.in_channels, # weight=weight) elif task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) #elif task_name == "bert": # text_model = BertModel.from_pretrained(config.bert_path) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) logs = FitlogCallback(dev_data) f1 = F1_score(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=BCEWithLogitsLoss(), batch_size=config.batch_size, check_code_level=-1, metrics=f1, metric_key='f1', n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester( dev_data, text_model, metrics=f1, device=config.device, batch_size=config.batch_size, ) tester.test()
"hidden_size": arg.hidden_size, "dropout": arg.dropout, "use_allennlp": False, }, ) optimizer = Adadelta(lr=arg.lr, params=model.parameters()) scheduler = StepLR(optimizer, step_size=10, gamma=0.5) callbacks = [ LRScheduler(scheduler), ] if arg.task in ['snli']: callbacks.append( FitlogCallback(data_info.datasets[arg.testset_name], verbose=1)) elif arg.task == 'mnli': callbacks.append( FitlogCallback( { 'dev_matched': data_info.datasets['dev_matched'], 'dev_mismatched': data_info.datasets['dev_mismatched'] }, verbose=1)) trainer = Trainer(train_data=data_info.datasets['train'], model=model, optimizer=optimizer, num_workers=0, batch_size=arg.batch_size, n_epochs=arg.n_epochs,
optimizer = optim.AdamW(param_, lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'sgd': # optimizer = optim.SGD(model.parameters(),lr=args.lr,momentum=args.momentum, # weight_decay=args.weight_decay) optimizer = optim.SGD(param_, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if 'msra' in args.dataset: datasets['dev'] = datasets['test'] fitlog_evaluate_dataset = {'test': datasets['test']} if args.test_train: fitlog_evaluate_dataset['train'] = datasets['train'] evaluate_callback = FitlogCallback(fitlog_evaluate_dataset, verbose=1) lrschedule_callback = LRScheduler( lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep))) clip_callback = GradientClipCallback(clip_type='value', clip_value=5) # model.state_dict() class CheckWeightCallback(Callback): def __init__(self, model): super().__init__() self.model_ = model def on_step_end(self): print('parameter weight:', flush=True) print(self.model_.state_dict()['encoder.layer_0.attn.w_q.weight'], flush=True)
embed_dim=128, hidden_dim=hidden_units, output_dim=8) # mymodel = load_model(mymodel, './model/best_TextModel_acc_2019-06-28-09-07-50') trainer = Trainer( train_data=train_data, model=mymodel, loss=CrossEntropyLoss(pred='pred', target='target'), # loss=SkipBudgetLoss(pred='pred', target='target', updated_states='updated_states'), metrics=[AccuracyMetric(), UsedStepsMetric()], n_epochs=30, batch_size=batch_size, print_every=-1, validate_every=-1, dev_data=test_data, save_path='./model', optimizer=Adam(lr=learning_rate, weight_decay=0), check_code_level=0, device="cuda", metric_key='acc', use_tqdm=False, callbacks=[FitlogCallback(test_data)]) start = time.clock() trainer.train() end = time.clock() training_time = end - start print('total training time:%fs' % (end - start)) fitlog.add_hyper({'time': training_time}) fitlog.finish()