def objective(self, trial): for (param, value) in self.get_tunable_parameters(trial, self.args).items(): setattr(self.args, param, value) model = getattr(models, args.model)(self.adj, self.args).to(self.device) optimizer = optim.Adam(model.parameters(), lr=self.args.lr) # Training if args.log_file: logging.basicConfig(filename=args.log_file, level=logging.INFO) else: logging.basicConfig(level=logging.INFO, format='# %(message)s') val_loss_list = [] logger.info(f"model: {trial.params}") dataloader_train = DataLoader(self.dataset_train, batch_size=self.args.batch_size, shuffle=True, num_workers=1) dataloader_val = DataLoader(self.dataset_val, batch_size=self.args.batch_size, shuffle=False, num_workers=1) for epoch in range(self.args.n_epochs): train_loss = run_epoch(model, optimizer, dataloader_train) val_loss = run_epoch(model, optimizer, dataloader_val, training=False) logger.info(f"epoch: {epoch}, train:{train_loss}, val:{val_loss}") trial.report(val_loss, epoch) if trial.should_prune(): raise optuna.TrialPruned() val_loss_list.append(val_loss) return min(val_loss_list)
def run(model, params, train_dataset, dev_dataset, idx2word): max_em = max_f1 = max_ep = es_cnt = 0 train_epoch = params['train_epoch'] test_epoch = params['test_epoch'] init_lr = params['learning_rate'] early_stop = params['early_stop'] train_iter = valid_iter = 0 if params['mode'] == 'q': LM = load_lm(params['lm_path']) else: LM = None for epoch_idx in range(train_epoch): if params['train']: start_time = datetime.datetime.now() print("\n[Epoch %d]" % (epoch_idx + 1)) _, _, _, train_iter = run_epoch(model, train_dataset, epoch_idx + 1, train_iter, idx2word, params, is_train=True, lang_model=LM) elapsed_time = datetime.datetime.now() - start_time print('Epoch %d Done in %s' % (epoch_idx + 1, elapsed_time)) if (epoch_idx + 1) % test_epoch == 0: em, f1, loss, valid_iter = run_epoch(model, dev_dataset, 0, valid_iter, idx2word, params, is_train=False, lang_model=LM) if max_f1 > f1 - 1e-2 and epoch_idx > 0 and early_stop: print('Max em: %.3f, f1: %.3f, epoch: %d' % (max_em, max_f1, max_ep)) es_cnt += 1 if es_cnt > 3: print('\nEarly stopping') print('Max em: %.3f, f1: %.3f, epoch: %d' % ( max_em, max_f1, max_ep)) break else: # Learning rate decay exponentially print('\nLower learning rate from %f to %f (%d/3)' % ( params['learning_rate'], params['learning_rate'] / 2, es_cnt)) params['learning_rate'] /= 2 else: es_cnt = 0 max_ep = max_ep if max_em > em else (epoch_idx + 1) max_em = max_em if max_em > em else em max_f1 = max_f1 if max_f1 > f1 else f1 print('Max em: %.3f, f1: %.3f, epoch: %d' % (max_em, max_f1, max_ep)) if params['save']: model.save(params['checkpoint_dir']) model.reset_graph() params['learning_rate'] = init_lr return max_em, max_f1, max_ep
def run_experiment(model, dataset, set_num): writer = SummaryWriter('tensorboard') best_metric = np.zeros(2) early_stop = False if model.config.train: if model.config.resume: model.load_checkpoint() for ep in range(model.config.epoch): if early_stop: break print('- Training Epoch %d' % (ep + 1)) tr_met = run_epoch(model, dataset, ep, 'tr', set_num) writer.add_scalar('Train/Loss', tr_met[0], ep + 1) writer.add_scalar('Train/Accuracy', tr_met[1], ep + 1) if model.config.valid: print('- Validation') val_met = run_epoch(model, dataset, ep, 'va', set_num, False) writer.add_scalar('Validation/Loss', val_met[0], ep + 1) writer.add_scalar('Validation/Accuracy', val_met[1], ep + 1) if best_metric[1] < val_met[1]: best_metric = val_met model.save_checkpoint({ 'config': model.config, 'state_dict': model.state_dict(), 'optimizer': model.optimizer.state_dict() }) if best_metric[1] == 100: break else: # model.decay_lr() if model.config.early_stop: early_stop = True print('\tearly stop applied') print('\tbest metrics:\t%s' % ('\t'.join(['{:.2f}'.format(k) for k in best_metric]))) if model.config.test: print('- Testing') test_met = run_epoch(model, dataset, ep, 'te', set_num, False) writer.add_scalar('Test/Loss', test_met[0], ep + 1) writer.add_scalar('Test/Accuracy', test_met[1], ep + 1) print() if model.config.test: print('- Load Validation/Testing') if model.config.resume or model.config.train: model.load_checkpoint() run_epoch(model, dataset, 0, 'va', set_num, False) run_epoch(model, dataset, 0, 'te', set_num, False) print() return best_metric
def run_experiment(model, dataset, set_num): best_metric = np.zeros(2) early_stop = False if model.config.train: if model.config.resume: model.load_checkpoint() for ep in range(model.config.epoch): if early_stop: break print('- Training Epoch %d' % (ep+1)) run_epoch(model, dataset, ep, 'tr', set_num) if model.config.valid: print('- Validation') met = run_epoch(model, dataset, ep, 'va', set_num, False) if best_metric[1] < met[1]: best_metric = met model.save_checkpoint({ 'config': model.config, 'state_dict': model.state_dict(), 'optimizer': model.optimizer.state_dict()}) if best_metric[1] == 100: break else: # model.decay_lr() if model.config.early_stop: early_stop = True print('\tearly stop applied') print('\tbest metrics:\t%s' % ('\t'.join(['{:.2f}'.format(k) for k in best_metric]))) if model.config.test: print('- Testing') run_epoch(model, dataset, ep, 'te', set_num, False) print() if model.config.test: print('- Load Validation/Testing') if model.config.resume or model.config.train: model.load_checkpoint() run_epoch(model, dataset, 0, 'va', set_num, False) run_epoch(model, dataset, 0, 'te', set_num, False) print() return best_metric
def run_experiment(model, dataset): if model.config.resume: model.load_checkpoint() if model.config.train: print('##### Training #####') prev_vloss = 99999 for ep in range(model.config.epoch): print('[Epoch %d]' % (ep + 1)) run_epoch(model, dataset, 'tr') if model.config.valid: print('##### Validation #####') vloss = run_epoch(model, dataset, 'va', is_train=True) if vloss < prev_vloss - 1: prev_vloss = vloss else: model.decay_lr() print('learning rate decay to %.3f' % model.config.lr) if model.config.test: print('##### Testing #####') run_epoch(model, dataset, 'te', is_train=False)
def main(): # Train the simple copy task. V = 11 criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0) model = make_model(V, V, N=2) model_opt = NoamOpt( model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(10): model.train() print("epoch %d" % epoch) run_epoch(data_gen(V, 30, 20), model, SimpleLossCompute(model.generator, criterion, model_opt)) model.eval() print( run_epoch(data_gen(V, 30, 5), model, SimpleLossCompute(model.generator, criterion, None))) model.eval() src = Variable(torch.LongTensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])) src_mask = Variable(torch.ones(1, 1, 10)) print(greedy_decode(model, src, src_mask, max_len=10, start_symbol=1))
def predict(self, lines): self.dataset.process_input(lines) _, answers = run_epoch(self.model, self.dataset, 0, 'te', 0, False) return [self.dataset.idx2word[an] for an in answers]
opt = DefaultConfig() if __name__ == '__main__': train_dataset = Data(train=True) test_dataset = Data(train=False) train_loader = DataLoaderX(train_dataset, batch_size=opt.batch_size, num_workers=4, pin_memory=True, shuffle=True) test_loader = DataLoaderX(test_dataset, batch_size=opt.batch_size, num_workers=4, pin_memory=True) num_skills = train_dataset.max_skill_num + 1 m = student_model(num_skills=num_skills, state_size=opt.state_size, num_heads=opt.num_heads, dropout=opt.dropout, infer=False) torch.backends.cudnn.benchmark = True best_auc = 0 optimizer = optim.Adam(m.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=opt.lr_decay) criterion = nn.BCELoss() for epoch in range(opt.max_epoch): rmse, auc, r2, acc = run_epoch(m, train_loader, optimizer, scheduler, criterion, num_skills=num_skills, epoch_id=epoch, is_training=True) print('Epoch %d:\nTrain metrics: auc: %.3f, acc: %.3f, rmse: %.3f, r2: %.3f' \ % (epoch + 1, auc, acc, rmse, r2)) rmse, auc, r2, acc = run_epoch(m, test_loader, optimizer, scheduler, criterion, num_skills=num_skills, epoch_id=epoch, is_training=False) print('\nTest metrics: auc: %.3f, acc: %.3f, rmse: %.3f, r2: %.3f' \ % (auc, acc, rmse ,r2)) if auc > best_auc: best_auc = auc torch.save(m.state_dict(), 'models/sakt_model_auc_{}.pkl'.format(int(best_auc * 1000)))
def main(path_to_train, path_to_test, sep=','): #sad se u dataset mogu slati datframe ili csv train_dataset = Data(path_to_csv=path_to_train, train=True, standard_load=False, sep=sep) test_dataset = Data(path_to_csv=path_to_test, train=False, standard_load=False, sep=sep) train_loader = DataLoaderX(train_dataset, batch_size=opt.batch_size, num_workers=4, pin_memory=True, shuffle=True) test_loader = DataLoaderX(test_dataset, batch_size=opt.batch_size, num_workers=4, pin_memory=True) num_skills = train_dataset.skill_num m = student_model(num_skills=num_skills, state_size=opt.state_size, num_heads=opt.num_heads, dropout=opt.dropout, infer=False) torch.backends.cudnn.benchmark = True best_auc = 0 optimizer = optim.Adam(m.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=opt.lr_decay) criterion = nn.BCELoss() for epoch in range(opt.max_epoch): rmse, auc, r2, acc = run_epoch(m, train_loader, optimizer, scheduler, criterion, num_skills=num_skills, epoch_id=epoch, is_training=True) print('Epoch %d:\nTrain metrics: auc: %.3f, acc: %.3f, rmse: %.3f, r2: %.3f' \ % (epoch + 1, auc, acc, rmse, r2)) rmse, auc, r2, acc = run_epoch(m, test_loader, optimizer, scheduler, criterion, num_skills=num_skills, epoch_id=epoch, is_training=False) print('\nTest metrics: auc: %.3f, acc: %.3f, rmse: %.3f, r2: %.3f' \ % (auc, acc, rmse ,r2)) if auc > best_auc: best_auc = auc torch.save( m.state_dict(), 'models/sakt_model_auc_{}.pkl'.format(int(best_auc * 1000)))
# criterion # In[8]: criterion = nn.BCEWithLogitsLoss() # train loop # In[ ]: best_loss = 80 train_loss_lst = [] val_loss_lst = [] for epoch in range(start_epoch, num_epochs): train_loss = run.run_epoch(model_ft, criterion, train_dataloader, epoch, optimizer_ft) val_loss = run.run_epoch(model_ft, criterion, val_dataloader, epoch) train_loss_lst.append(train_loss) val_loss_lst.append(val_loss) if val_loss < best_loss: print("save model at epoch : {:}".format(epoch)) best_loss = val_loss torch.save( { 'epoch': epoch, 'model_state_dict': model_ft.state_dict(), 'optimizer_state_dict': optimizer_ft.state_dict(), 'valloss': val_loss, }, os.path.join(outmodel_root2, "best4.pth"))
def run_experiment(model, dataset): if model.config.resume: model.load_checkpoint() if model.config.train: print('=' * 64) print( ' Training ') print('=' * 64) prev_vloss = 99999.0 tr_loss = [] va_loss = [] for ep in range(model.config.epoch): start_time = time.time() print('[Epoch %d] => lr = ' % (ep + 1), model.config.lr) print('-' * 64) tloss = run_epoch(model, dataset, 'tr') tr_loss.append(tloss) print('-' * 64) print('Train_loss = ', tloss) if model.config.valid: print('=' * 64) print( ' Validation ' ) print('=' * 64) vloss = run_epoch(model, dataset, 'va', is_train=True) va_loss.append(vloss) print('-' * 64) print('Valid_loss = ', vloss) if vloss < prev_vloss - 1.0: #1.0 prev_vloss = vloss print('Prev_vloss = ', prev_vloss) else: model.decay_lr() print('Learning_rate (lr) decays to %.3f' % model.config.lr) print('/' * 64) print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' .format(ep + 1, (time.time() - start_time), vloss)) print('/' * 64) print() # Plot losses and saving plot plt.plot(tr_loss, 'r', label="tr_loss") plt.plot(va_loss, 'b', label="va_loss") plt.legend(bbox_to_anchor=(0.72, 0.95), loc='upper left', borderaxespad=0.) plt.savefig('loss.png') with open('tr_loss.pkl', 'wb') as f1: pickle.dump(tr_loss, f1) with open('va_loss.pkl', 'wb') as f2: pickle.dump(va_loss, f2) if model.config.test: print('=' * 64) print( ' Testing ') print('=' * 64) te_loss = run_epoch(model, dataset, 'te', is_train=False) print('-' * 64) print('Test_loss = ', te_loss)