def train_model(): print('#### Start Training ####') data = np.load(data_dirc+'raw_data.npy') train_data, train_label, val_data, val_label = create_data(data, RAW_LABELS, PERMUTATION, RATIO, PREPROCESS, MAX_SENTENCE_LENGTH, AUGMENTED, PADDING) train_dataset = torch.utils.data.TensorDataset(train_data, train_label) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) val_dataset = torch.utils.data.TensorDataset(val_data, val_label) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) file_name = 'best_model' model = CNN(num_classes=4) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.to(device) # Criterion and Optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) best_acc = 0.0 for epoch in range(NUM_EPOCHS): train_loss = 0.0 for i, (data, labels) in enumerate(train_loader): model.train() data_batch, label_batch = data.to(device), labels.to(device) optimizer.zero_grad() outputs = model(data_batch) loss = criterion(outputs, label_batch) loss.backward() optimizer.step() train_loss += loss.item() # validate val_acc, val_F1 = cal_F1(val_loader, model) if val_acc > best_acc: best_acc = val_acc best_F1 = val_F1 torch.save(model.state_dict(),'saved_model/'+file_name+'.pth') train_acc = test_model(train_loader, model) train_loss /= len(train_loader.sampler) print('Epoch: [{}/{}], Step: [{}/{}], Val Acc: {}, Val F1: {}, Train Acc: {}, Train Loss: {}'.format( epoch + 1, NUM_EPOCHS, i + 1, len(train_loader), val_acc, val_F1, train_acc, train_loss)) sys.stdout.flush() print('#### End Training ####') print('best val acc:', best_acc) print('best F1:', best_F1)
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5) if torch.cuda.device_count() > 1: if args.gpu_ids == None: print("Let's use", torch.cuda.device_count(), "GPUs!") device = torch.device('cuda:0') else: print("Let's use", len(args.gpu_ids), "GPUs!") device = torch.device('cuda:' + str(args.gpu_ids[0])) else: device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('args.gpu_ids', args.gpu_ids) model = torch.nn.DataParallel(model, device_ids=args.gpu_ids) model = model.to(device) # criterion = torch.nn.BCELoss() criterion = torch.nn.CrossEntropyLoss( weight=train_dataset.weight.to(device)) # # test_loss = [] # for epoch in range(args.num_epochs): # start_time = time.time() each_train_loss = [] model.train() origin_inputs = [] origin_outputs = []
def main(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) if opt.vis: vis = Visualizer(opt.env) else: vis = None init_loss_file(opt) train_path, valid_path, test_path = init_file_path(opt) # random_state = random.randint(1, 50) # print("random_state:", random_state) train_dataset = KTData(train_path, opt='None') valid_dataset = KTData(valid_path, opt='None') test_dataset = KTData(test_path, opt='None') # print(train_path, valid_path, test_path) print(len(train_dataset), len(valid_dataset), len(test_dataset)) train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True, collate_fn=myutils.collate_fn) valid_loader = DataLoader(valid_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True, collate_fn=myutils.collate_fn) test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True, collate_fn=myutils.collate_fn) if opt.model_name == "CNN": model = CNN(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) elif opt.model_name == "CNN_3D": model = CNN_3D(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) else: model = RNN_DKT(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) lr = opt.lr last_epoch = -1 previous_loss = 1e10 optimizer = torch.optim.Adam( params=model.parameters(), lr=lr, weight_decay=opt.weight_decay, betas=(0.9, 0.99) ) if opt.model_path: map_location = lambda storage, loc: storage checkpoint = torch.load(opt.model_path, map_location=map_location) model.load_state_dict(checkpoint["model"]) last_epoch = checkpoint["epoch"] lr = checkpoint["lr"] optimizer.load_state_dict(checkpoint["optimizer"]) model = model.to(opt.device) loss_result = {} auc_resilt = {} # START TRAIN for epoch in range(opt.max_epoch): if epoch < last_epoch: continue if opt.model_name == "CNN_3D": train_loss_meter, train_auc_meter, train_loss_list = train.train_3d(opt, vis, model, train_loader, epoch, lr, optimizer) val_loss_meter, val_auc_meter, val_loss_list = train.valid_3d(opt, vis, model, valid_loader, epoch) test_loss_meter, test_auc_meter, test_loss_list = test.test_3d(opt, vis, model, test_loader, epoch) else: train_loss_meter, train_auc_meter, train_loss_list = train.train_3d(opt, vis, model, train_loader, epoch, lr, optimizer) val_loss_meter, val_auc_meter, val_loss_list = train.valid_3d(opt, vis, model, valid_loader, epoch) test_loss_meter, test_auc_meter, test_loss_list = test.test_3d(opt, vis, model, test_loader, epoch) loss_result["train_loss"] = train_loss_meter.value()[0] auc_resilt["train_auc"] = train_auc_meter.value()[0] loss_result["val_loss"] = val_loss_meter.value()[0] auc_resilt["val_auc"] = val_auc_meter.value()[0] loss_result["test_loss"] = test_loss_meter.value()[0] auc_resilt["test_auc"] = test_auc_meter.value()[0] for k, v in loss_result.items(): print("epoch:{epoch}, {k}:{v:.5f}".format(epoch=epoch, k=k, v=v)) if opt.vis: vis.line(X=np.array([epoch]), Y=np.array([v]), win="loss", opts=dict(title="loss", showlegend=True), name = k, update='append') for k, v in auc_resilt.items(): print("epoch:{epoch}, {k}:{v:.5f}".format(epoch=epoch, k=k, v=v)) if opt.vis: vis.line(X=np.array([epoch]), Y=np.array([v]), win="auc", opts=dict(title="auc", showlegend=True), name = k, update='append') # TODO 每个epoch结束后把loss写入文件 myutils.save_loss_file(opt, epoch, train_loss_list, val_loss_list, test_loss_list) # TODO 每save_every个epoch结束后保存模型参数+optimizer参数 if epoch % opt.save_every == 0: myutils.save_model_weight(opt, model, optimizer, epoch, lr) # TODO 做lr_decay lr = myutils.adjust_lr(opt, optimizer, epoch) # TODO 结束的时候保存final模型参数 myutils.save_model_weight(opt, model, optimizer, epoch, lr, is_final=True)
def run_one_setting(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) print(opt.__dict__) if opt.vis: vis = Visualizer(opt.env) else: vis = None init_loss_file(opt) if opt.data_source == "statics": opt.fold_dataset = True train_path, valid_path, test_path = init_file_path(opt) print(opt.fold_dataset) train_dataset = KTData(train_path, fold_dataset=opt.fold_dataset, q_numbers=opt.output_dim, opt='None') test_dataset = KTData(test_path, fold_dataset=opt.fold_dataset, q_numbers=opt.output_dim, opt='None') print(len(train_dataset), len(test_dataset)) train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True, collate_fn=myutils.collate_fn) test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True, collate_fn=myutils.collate_fn) if opt.model_name == "CNN": model = CNN(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) elif opt.model_name == "CNN_3D": model = CNN_3D(opt.k_frames, opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) elif opt.model_name == "CNN_3D_mask": model = CNN_3D_mask(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device, max_seq_len=200) elif opt.model_name == "Res21D": model = Res21D(opt.k_frames, opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) elif opt.model_name == "CNN_Concat": model = CNN_Concat(opt.k_frames, opt.input_dim, opt.H, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) else: model = RNN_DKT(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) lr = opt.lr optimizer = torch.optim.Adam(params=model.parameters(), lr=lr, weight_decay=opt.weight_decay, betas=(0.9, 0.99)) model = model.to(opt.device) best_test_auc = 0 # START TRAIN for epoch in range(opt.max_epoch): if opt.model_name == "CNN_3D_mask" or opt.model_name == "Res21D" or opt.model_name == "CNN_Concat": torch.cuda.empty_cache() train_loss_meter, train_auc_meter, train_loss_list = train.train( opt, vis, model, train_loader, epoch, lr, optimizer) torch.cuda.empty_cache() test_loss_meter, test_auc_meter, test_loss_list = test.test( opt, vis, model, test_loader, epoch) print("epoch{}, {k}:{v:.5f}".format(epoch, k="train_auc", v=train_auc_meter.value()[0])) print("epoch{}, {k}:{v:.5f}".format(epoch, k="test_auc", v=test_auc_meter.value()[0])) best_test_auc = max(best_test_auc, test_auc_meter.value()[0]) print("best_test_auc is: ", best_test_auc) # TODO 做lr_decay lr = myutils.adjust_lr(opt, optimizer, epoch, train_loss_meter.value()[0]) return best_test_auc
def run_train_valid(opt, vis): print(opt.__dict__) train_path, valid_path, test_path = init_file_path(opt) train_dataset = KTData(train_path, opt='None') valid_dataset = KTData(valid_path, opt='None') print(train_path, valid_path) print(len(train_dataset), len(valid_dataset)) train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True, collate_fn=myutils.collate_fn) valid_loader = DataLoader(valid_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True, collate_fn=myutils.collate_fn) if opt.model_name == "CNN": model = CNN(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) elif opt.model_name == "CNN_3D": model = CNN_3D(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) else: model = RNN_DKT(opt.input_dim, opt.embed_dim, opt.hidden_dim, opt.num_layers, opt.output_dim, opt.batch_size, opt.device) lr = opt.lr last_epoch = -1 previous_loss = 1e10 optimizer = torch.optim.Adam(params=model.parameters(), lr=lr, weight_decay=opt.weight_decay, betas=(0.9, 0.99)) if opt.model_path: map_location = lambda storage, loc: storage checkpoint = torch.load(opt.model_path, map_location=map_location) model.load_state_dict(checkpoint["model"]) last_epoch = checkpoint["epoch"] lr = checkpoint["lr"] optimizer.load_state_dict(checkpoint["optimizer"]) model = model.to(opt.device) train_loss_list = [] train_auc_list = [] valid_loss_list = [] valid_auc_list = [] # START TRAIN for epoch in range(opt.max_epoch): if epoch < last_epoch: continue train_loss_meter, train_auc_meter, _ = train.train_3d( opt, vis, model, train_loader, epoch, lr, optimizer) val_loss_meter, val_auc_meter, _ = train.valid_3d( opt, vis, model, valid_loader, epoch) print("epoch: {}, train_auc: {}, val_auc: {}".format( epoch, train_auc_meter.value()[0], val_auc_meter.value()[0])) train_loss_list.append(train_loss_meter.value()[0]) train_auc_list.append(train_auc_meter.value()[0]) valid_loss_list.append(val_loss_meter.value()[0]) valid_auc_list.append(val_auc_meter.value()[0]) # TODO 每save_every个epoch结束后保存模型参数+optimizer参数 if epoch % opt.save_every == 0: myutils.save_model_weight(opt, model, optimizer, epoch, lr, is_CV=True) # TODO 做lr_decay lr = myutils.adjust_lr(opt, optimizer, epoch) # TODO 结束的时候保存final模型参数 myutils.save_model_weight(opt, model, optimizer, epoch, lr, is_final=True, is_CV=True) return train_loss_list, train_auc_list, valid_loss_list, valid_auc_list