def grabPic(webaddress): sec = requests.get("http://image.thum.io/get/?url=http%3A%2F%2F{}%2F".format(webaddress), stream=True) # sec = requests.get("https://image.thum.io/get/auth/7730-moein/https://{}/".format(webaddress), stream=True) MyModel.update(sec.content) data = GetHistory() response = { 'content': sec.content, 'time': datetime.datetime.now(), 'data':data } return response
def Train_GACNN(params_dict, chrom_i, args, cfg, log, epoches=20): # args = parser() # cfg = Config.fromfile(args.config) # log = Logger(cfg.PARA.utils_paths.log_path + 'GA_MyCNN' + '_log.txt', level='info') log.logger.info('==> Preparing dataset <==') cifar10 = Cifar10(batch_size = cfg.PARA.train.batch_size) # subtrain_loader, subvalid_loader = cifar10.Download_SubTrain_SubValid() # print(len(subtrain_loader), len(subvalid_loader)) train_loader, valid_loader = cifar10.Download_Train_Valid() test_loader = cifar10.Download_Test() log.logger.info('==> Loading model <==') # net = vgg16() net = MyModel(params_dict, chrom_i) if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net.to(device) print(net) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.SGD(net.parameters(), lr=cfg.PARA.train.lr) # , momentum=cfg.PARA.train.momentum log.logger.info('==> Waiting Train <==') loss = train_valid(net=net, dict=params_dict, criterion=criterion, optimizer=optimizer, train_loader=train_loader, valid_loader=valid_loader, args=args, log=log, cfg=cfg, epoches=epoches) # log.logger.info("==> Waiting Test <==") # # with open(cfg.PARA.utils_paths.checkpoint_path + 'GACNN/' + 'best_net_params.pkl', 'rb') as f: # # best_net_params = pkl.load(f) # checkpoint = torch.load(cfg.PARA.utils_paths.checkpoint_path + 'GACNN/' + 'best_ckpt.pth') # # 进行测试时,net 和保存的 net 是不一样的,所以需要重新设置 net 可是之前就可以,说明没问题啊。。。奇怪 # net2 = MyCNN(best_net_params) # net2.to(device) # net2.load_state_dict(checkpoint['net']) # test_acc = test(net=net2, test_loader=test_loader) # net.load_state_dict(checkpoint['net']) # test_acc = test(net=net, test_loader=test_loader) # log.logger.info('Test ACC = {:.5f}'.format(test_acc)) # log.logger.info('==> One Train & Valid & Test End <==') return loss
def test_create_objects(self): """ Create a few objects in the database and check that they are actually where they belong. """ from models import MyModel o1 = MyModel(col1='foo', col2=1, col3=True) o1.save() o2 = MyModel(col1='bar', col2=2, col3=False) o2.save() self.assertEquals(MyModel.objects.get(col2=1).col1, 'foo') self.assertEquals(MyModel.objects.get(col2=2).col1, 'bar')
def edit(id): MyForm = model_form(MyModel, Form) model = MyModel.get(id) form = MyForm(request.form, model) if form.validate_on_submit(): form.populate_obj(model) model.put() flash("MyModel updated") return redirect(url_for("index")) return render_template("edit.html", form=form)
def test(): # load model and use weights we saved before. model = MyModel() model.load_state_dict(torch.load('mymodel.pth', map_location='cpu')) model.eval() # load testing data data = pd.read_csv('test.csv', encoding='utf-8') label_col = [ 'Input_A6_024', 'Input_A3_016', 'Input_C_013', 'Input_A2_016', 'Input_A3_017', 'Input_C_050', 'Input_A6_001', 'Input_C_096', 'Input_A3_018', 'Input_A6_019', 'Input_A1_020', 'Input_A6_011', 'Input_A3_015', 'Input_C_046', 'Input_C_049', 'Input_A2_024', 'Input_C_058', 'Input_C_057', 'Input_A3_013', 'Input_A2_017' ] # ================================================================ # # if do some operations with training data, # do the same operations to the testing data in this block data = data.fillna(0) # ================================================================ # # convert dataframe to tensor, no need to rewrite inputs = data.values inputs = torch.tensor(inputs) # predict and save the result result = pd.DataFrame(columns=label_col) outputs = model(inputs.float()) for i in range(len(outputs)): tmp = outputs[i].detach().numpy() tmp = pd.DataFrame([tmp], columns=label_col) result = pd.concat([result, tmp], ignore_index=True) result.to_csv('result.csv', index=False)
def test(full_dataset, state_name): # load model and use weights we saved before. model = MyModel() model.load_state_dict( torch.load(f'mymodel_{state_name}.pth', map_location='cpu')) model.eval() criterion = RMSLELoss() # convert dataframe to tensor inputs = full_dataset.test[full_dataset.col].astype(float).values inputs = torch.tensor(inputs).float() # predict outputs = model(inputs) # get labels labels = full_dataset.test_target.values # RMSLE Loss loss = criterion(outputs, torch.from_numpy(labels)) test_loss = sqrt(loss / len(full_dataset.test)) print(f'Testing Loss: {test_loss:.6f}') #save the result result = full_dataset.test["id"].to_frame() result.insert(1, "visitors_pred", outputs.detach().numpy()) result.insert(2, "visitors_actual", labels) result.to_csv(f'result_{state_name}.csv', index=False)
def train(lr=0.001, epoch=600, batch_size=32): train_loss_curve = [] train_wrmse_curve = [] valid_loss_curve = [] valid_wrmse_curve = [] # load model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = MyModel() model = model.to(device) model.train() # dataset and dataloader # can use torch random_split to create the validation dataset dataset = MLDataset() train_size = int(0.9 * len(dataset)) valid_size = len(dataset) - train_size train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size]) train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=True) # loss function and optimizer # can change loss function and optimizer you want criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) best = 100 # start training for e in range(epoch): train_loss = 0.0 train_wrmse = 0.0 valid_loss = 0.0 valid_wrmse = 0.0 print(f'\nEpoch: {e+1}/{epoch}') print('-' * len(f'Epoch: {e+1}/{epoch}')) # tqdm to disply progress bar for inputs, labels in tqdm(train_dataloader): # data from data_loader inputs = inputs.float().to(device) labels = labels.float().to(device) outputs = model(inputs) # MSE loss and WRMSE loss = criterion(outputs, labels) wrmse = WRMSE(outputs, labels, device) # weights update optimizer.zero_grad() loss.backward() optimizer.step() # loss calculate train_loss += loss.item() train_wrmse += wrmse # =================================================================== # # If you have created the validation dataset, # you can refer to the for loop above and calculate the validation loss for inputs, labels in tqdm(valid_dataloader): # data from data_loader inputs = inputs.float().to(device) labels = labels.float().to(device) outputs = model(inputs) # MSE loss and WRMSE loss = criterion(outputs, labels) wrmse = WRMSE(outputs, labels, device) # loss calculate valid_loss += loss.item() valid_wrmse += wrmse # =================================================================== # # save the best model weights as .pth file loss_epoch = train_loss / len(train_dataset) wrmse_epoch = math.sqrt(train_wrmse / len(train_dataset)) valid_loss_epoch = valid_loss / len(valid_dataset) valid_wrmse_epoch = math.sqrt(valid_wrmse / len(valid_dataset)) if valid_wrmse_epoch < best: best = valid_wrmse_epoch torch.save(model.state_dict(), 'mymodel.pth') print(f'Training loss: {loss_epoch:.4f}') print(f'Training WRMSE: {wrmse_epoch:.4f}') print(f'Valid loss: {valid_loss_epoch:.4f}') print(f'Valid WRMSE: {valid_wrmse_epoch:.4f}') # save loss and wrmse every epoch train_loss_curve.append(loss_epoch) train_wrmse_curve.append(wrmse_epoch) valid_loss_curve.append(valid_loss_epoch) valid_wrmse_curve.append(valid_wrmse_epoch) # generate training curve visualize(train_loss_curve, valid_loss_curve, 'Train Loss') visualize(train_wrmse_curve, valid_wrmse_curve, 'Train WRMSE') print("\nBest Validation WRMSE:", best)
def predict(dim, names, weight, batch_size, pretrain_model_path, model_types=None): print('-' * 100) print('multi-models begin predicting ...') print('-' * 100) # read test data test_file = '/kaggle/input/quora-question-pairs/test.csv.zip' # data test_df = pd.read_csv(test_file) test_ids = test_df['test_id'].values.tolist() result_prob_tmp = torch.zeros((len(test_ids), 2)) # load model for i, name in enumerate(names): # 3.17 add weight_ = weight[i] #model_path = '../model/' + name + '.pkl' output_model_file = os.path.join('output', name + '.pkl') state = torch.load(output_model_file) # 3.10 add model_type = model_types[i] if model_type == 'mlp': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'cnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyTextCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'rcnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyRCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) model.to(device) model.load_state_dict(state['model_state']) model.eval() print('-' * 20, 'model', i, '-' * 20) print('load model:%s, loss:%.4f, e:%d, lr:%.7f, time:%d' % (name, state['loss'], state['e'], state['lr'], state['time'])) # predict with torch.no_grad(): j = 0 for batch in tqdm(test_iter): batch = [b.cuda() for b in batch] out = model(batch, task='eval') out = out.cpu() # gpu -> cpu if j == 0: tmp = out # 初始化 tmp else: tmp = torch.cat([tmp, out], dim=0) # 将之后的预测结果拼接到 tmp 中 j += 1 # 当前 模型预测完成 print('model', i, 'predict finished!\n') # 3.17 按权重融合 result_prob_tmp += (weight_ / len(names)) * tmp # 删除模型 del model gc.collect() time.sleep(1) # 3.10 当前融合策略:prob 简单的取 avg _, result = torch.max(result_prob_tmp, dim=-1) result = result.numpy() # 3.16 update: label 0的prob 大于 3,就认为是 label=0 # with open('tmp.txt', 'w', encoding='utf-8') as f: # for r in result_prob_tmp: # f.write(str(r) + '\n') # save result df = pd.DataFrame() df['test_id'] = test_ids df['is_duplicate'] = result df.to_csv("submission.csv", encoding='utf-8', index=False)
######### you may change the dataset split % ######### train_set, val_set, test_set = prepare_datasets(splits=[0.7, 0.15, 0.15]) ############# create torch DataLoaders ############### ########### you may change the batch size ############ train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=1000) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1000) ################ initialize the model ################ if args.model == 'convnet': model = ConvNet() elif args.model == 'mymodel': model = MyModel() else: raise Exception('Incorrect model name') if args.cuda: model.cuda() ######## Define loss function and optimizer ########## ############## Write your code here ################## params = model.parameters() optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() ###################################################### def train(epoch):
def main(): _i, _j, _k = 2,3,3 dataset = MyDataset(_i,_j,_k) dtype = torch.float device = torch.device("cpu") # device = torch.device("cuda:0") #batch, input, hidden, output N, D_in, H, D_out = 10, _i+_j+_k, 16, _i*_j*_k msg_len = 10 x, y = dataset.get_frame() x = torch.tensor(x, dtype=dtype, device=device) #x = torch.cat((x,x,x,x,x),0) y = torch.tensor(y, dtype=torch.long, device=device).squeeze() #y = torch.cat((y,y,y,y,y),0) print(x.size(), y.size()) #x = torch.zeros(N, D_in, device=device, dtype=dtype) #y = torch.zeros(N, device=device, dtype=dtype) model = MyModel(D_in, H, D_out) #model = torch.nn.Linear(D_in, D_out) loss_fn = torch.nn.CrossEntropyLoss(reduce=None) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) for t in range(10001): if True: #reinforce y_pred = model(x) probs = F.softmax(y_pred, dim=1) m = Categorical(probs) action = m.sample() reward = torch.eq(action, y).to(torch.float) reward = (reward - reward.mean()) loss = -m.log_prob(action) * reward model.zero_grad() loss.sum().backward() #loss.backward(loss) optimizer.step() elif True: y_pred = model(x) else: # supervised y_pred = model(x) loss = loss_fn(y_pred, y) model.zero_grad() loss.backward() optimizer.step() if t % 100 == 0: with torch.no_grad(): y_pred = model(x) eq = torch.eq(torch.argmax(y_pred, dim=1), y) print("t: {}, acc: {}/{} = {}".format(t, torch.sum(eq).item(), eq.numel(), torch.sum(eq).item() / eq.numel())) torch.save({'epoch': t, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss }, "checkpoints.tar")
def train( batch_size=16, pretrain_model_path='', name='', model_type='mlp', after_bert_choice='last_cls', dim=1024, lr=1e-5, epoch=12, smoothing=0.05, sample=False, #open_ad='', dialog_name='xxx'): if not pretrain_model_path or not name: assert 1 == -1 print('\n********** model type:', model_type, '**********') print('batch_size:', batch_size) # load dataset train_file = '/kaggle/input/dataset/my_train.csv' dev_file = '/kaggle/input/dataset/my_dev.csv' train_num = len(pd.read_csv(train_file).values.tolist()) val_num = len(pd.read_csv(dev_file).values.tolist()) print('train_num: %d, dev_num: %d' % (train_num, val_num)) # 选择模型 if model_type in ['siam', 'esim', 'sbert']: assert 1 == -1 else: train_iter = MyDataset(file=train_file, is_train=True, sample=sample, pretrain_model_path=pretrain_model_path) train_iter = get_dataloader(train_iter, batch_size, shuffle=True, drop_last=True) dev_iter = MyDataset(file=dev_file, is_train=True, sample=sample, pretrain_model_path=pretrain_model_path) dev_iter = get_dataloader(dev_iter, batch_size, shuffle=False, drop_last=False) if model_type == 'mlp': model = MyModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing, after_bert_choice='last_cls') elif model_type == 'cnn': model = MyTextCNNModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing) elif model_type == 'rcnn': model = MyRCNNModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing) #模型加载到gpu model.to(device) model_param_num = 0 ##### 3.24 muppti-gpu-training if n_gpu > 1: model = torch.nn.DataParallel(model) for p in model.parameters(): if p.requires_grad: model_param_num += p.nelement() print('param_num:%d\n' % model_param_num) # 加入对抗训练,提升泛化能力;但是训练速度明显变慢 (插件式调用) # 3.12 change to FGM 更快! """ if open_ad == 'fgm': fgm = FGM(model) elif open_ad == 'pgd': pgd = PGD(model) K = 3 """ # model-store-path #model_path = '/kaggle/output/' + name + '.pkl' # 输出模型默认存放在当前路径下 output_dir = 'output' state = {} time0 = time.time() best_loss = 999 early_stop = 0 for e in range(epoch): print("*" * 100) print("Epoch:", e) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=0.05, t_total=len(train_iter)) # 设置优化器 train_loss = 0 train_c = 0 train_right_num = 0 model.train() # 将模型设置成训练模式(Sets the module in training mode) print('training..., %s, e:%d, lr:%7f' % (name, e, lr)) for batch in tqdm(train_iter): # 每一次返回 batch_size 条数据 optimizer.zero_grad() # 清空梯度 batch = [b.to(device) for b in batch] # cpu -> GPU # 正常训练 labels = batch[-1].view(-1).cpu().numpy() loss, bert_enc = model(batch, task='train', epoch=epoch) # 进行前向传播,真正开始训练;计算 loss right_num = count_right_num(bert_enc, labels) # multi-gpu training! if n_gpu > 1: loss = loss.mean() loss.backward() # 反向传播计算参数的梯度 #""" if open_ad == 'fgm': # 对抗训练 fgm.attack() # 在embedding上添加对抗扰动 if model_type == 'multi-task': loss_adv, _, _ = model(batch, task='train') else: loss_adv, _ = model(batch, task='train') if n_gpu > 1: loss_adv = loss_adv.mean() loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 fgm.restore() # 恢复embedding参数 elif open_ad == 'pgd': pgd.backup_grad() # 对抗训练 for t in range(K): pgd.attack(is_first_attack=( t == 0 )) # 在embedding上添加对抗扰动, first attack时备份param.data if t != K - 1: optimizer.zero_grad() else: pgd.restore_grad() if model_type == 'multi-task': loss_adv, _, _ = model(batch, task='train') else: loss_adv, _ = model(batch, task='train') if n_gpu > 1: loss_adv = loss_adv.mean() loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 pgd.restore() # 恢复embedding参数 #""" optimizer.step() # 更新参数 train_loss += loss.item() # loss 求和 train_c += 1 train_right_num += right_num val_loss = 0 val_c = 0 val_right_num = 0 model.eval() print('eval...') with torch.no_grad(): # 不进行梯度的反向传播 for batch in tqdm(dev_iter): # 每一次返回 batch_size 条数据 batch = [b.to(device) for b in batch] labels = batch[-1].view(-1).cpu().numpy() loss, bert_enc = model(batch, task='train', epoch=epoch) # 进行前向传播,真正开始训练;计算 loss right_num = count_right_num(bert_enc, labels) if n_gpu > 1: loss = loss.mean() val_c += 1 val_loss += loss.item() val_right_num += right_num train_acc = train_right_num / train_num val_acc = val_right_num / val_num print('train_acc: %.4f, val_acc: %.4f' % (train_acc, val_acc)) print('train_loss: %.4f, val_loss: %.4f, time: %d' % (train_loss / train_c, val_loss / val_c, time.time() - time0)) if val_loss / val_c < best_loss: early_stop = 0 best_loss = val_loss / val_c best_acc = val_acc # 3.24 update 多卡训练时模型保存避坑: if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model.module if hasattr(model, 'module') else model state['model_state'] = model_to_save.state_dict() state['loss'] = val_loss / val_c state['acc'] = val_acc state['e'] = e state['time'] = time.time() - time0 state['lr'] = lr output_model_file = os.path.join(output_dir, name + '.pkl') torch.save(state, output_model_file) #torch.save(state, model_path) best_epoch = e cost_time = time.time() - time0 tmp_train_acc = train_acc best_model = model else: early_stop += 1 if early_stop == 2: break model = best_model lr = lr * 0.5 print("best_loss:", best_loss) # 3.12 add 打印显示最终的最优结果 print('-' * 30) print('best_epoch:', best_epoch, 'best_loss:', best_loss, 'best_acc:', best_acc, 'reach time:', cost_time, '\n') # model-clean del model gc.collect() # 实验结果写入日志 """
def GetHistory(): AllPic = MyModel.GetPicturs() data=[] for pic in AllPic: data.append(pic[0]) return data
verbose=2, steps_per_epoch=train_generator.samples // batch_size, epochs=epoch_number, validation_data=validation_generator, validation_steps=validation_generator.samples // batch_size) print("Model saved to file: {}".format(output_model_file)) model.save(models_dir + output_model_file) if __name__ == "__main__": if not os.path.exists(train_data_dir): print("Train data directory does not exist, exiting") exit(1) args = parse_args() models_class = MyModel(size=image_size) if args.model == 'CLS': output_model_file = 'simple_model.h5' model = models_class.get_simple_model() elif args.model == 'CNV': output_model_file = 'cnv_model.h5' model = models_class.get_conv_learn_model() elif args.model == 'TRM': output_model_file = 'all_untrim_model.h5' model = models_class.get_all_net_trim_model() else: print("Wrong model, use one of following: 'CLS', 'CNV', 'TRM'") exit(1) train_model(model)
def train(lr=0.001, epoch=200, batch_size=64): train_loss_curve = [] train_wrmse_curve = [] valid_loss_curve = [] valid_wrmse_curve = [] best = 100 # load model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = MyModel() model = model.to(device) model.train() # dataset and dataloader # load data full_dataset = pd.read_csv('train.csv', encoding='utf-8') # can use torch random_split to create the validation dataset lengths = [ int(round(len(full_dataset) * 0.8)), int(round(len(full_dataset) * 0.2)) ] train_set, valid_set = random_split(full_dataset, lengths) train_dataset = MLDataset(full_dataset.iloc[train_set.indices]) valid_dataset = MLDataset(full_dataset.iloc[valid_set.indices]) train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False) # loss function and optimizer # can change loss function and optimizer you want criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # start training for e in tqdm(range(epoch)): train_loss, valid_loss = 0.0, 0.0 train_wrmse, valid_wrmse = 0.0, 0.0 print(f'\nEpoch: {e+1}/{epoch}') print('-' * len(f'Epoch: {e+1}/{epoch}')) # tqdm to disply progress bar for inputs, labels in train_dataloader: # data from data_loader inputs = inputs.float().to(device) labels = labels.float().to(device) outputs = model(inputs) # MSE loss and WRMSE loss = criterion(outputs, labels) wrmse = WRMSE(outputs, labels, device) # weights update optimizer.zero_grad() loss.backward() optimizer.step() # loss calculate train_loss += loss.item() train_wrmse += wrmse # =================================================================== # # If you have created the validation dataset, # you can refer to the for loop above and calculate the validation loss # tqdm to disply progress bar for inputs, labels in valid_dataloader: # data from data_loader inputs = inputs.float().to(device) labels = labels.float().to(device) outputs = model(inputs) # MSE loss and WRMSE loss = criterion(outputs, labels) wrmse = WRMSE(outputs, labels, device) # loss calculate valid_loss += loss.item() valid_wrmse += wrmse # =================================================================== # # save the best model weights as .pth file train_loss_epoch = train_loss / len(train_dataset) train_wrmse_epoch = math.sqrt(train_wrmse / len(train_dataset)) valid_loss_epoch = valid_loss / len(valid_dataset) valid_wrmse_epoch = math.sqrt(valid_wrmse / len(valid_dataset)) if train_wrmse_epoch < best: best_wrmse = train_wrmse_epoch best_loss = train_loss_epoch best_epoch = e torch.save(model.state_dict(), 'mymodel.pth') print(f'Training loss: {train_loss_epoch:.6f}') print(f'Training WRMSE: {train_wrmse_epoch:.6f}') print(f'Valid loss: {valid_loss_epoch:.6f}') print(f'Valid WRMSE: {valid_wrmse_epoch:.6f}') # save loss and wrmse every epoch train_loss_curve.append(train_loss_epoch) train_wrmse_curve.append(train_wrmse_epoch) valid_loss_curve.append(valid_loss_epoch) valid_wrmse_curve.append(valid_wrmse_epoch) # print the best wrmse print(f"\nBest Epoch = {best_epoch}") print(f"Best Loss = {best_loss:.4f}") print(f"Best WRMSE = {best_wrmse:.4f}\n") # generate training curve visualize(train=train_loss_curve, valid=valid_loss_curve, title='Loss Curve', filename='loss.png', best=(e, best_loss)) visualize(train_wrmse_curve, valid_wrmse_curve, title='WRMSE Curve', filename='wrmse.png', best=(e, best_wrmse), wrmse=True)
def train(dataset_path, lr, epoch, batch_size, scaler_flag, state_name): train_loss_curve = [] best = -1 # load model device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') model = MyModel() model = model.to(device) model.train() # dataset and dataloader full_dataset = Visitor_Dataset(dataset_path, scaler_flag) train_dataloader = DataLoader(dataset=full_dataset, batch_size=batch_size, shuffle=True) # loss function and optimizer criterion = RMSLELoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # start training for e in range(epoch): train_loss, train_rmsle = 0.0, 0.0 print(f'\nEpoch: {e+1}/{epoch}') print('-' * len(f'Epoch: {e+1}/{epoch}')) # tqdm to disply progress bar for inputs, labels in tqdm(train_dataloader): # data from data_loader inputs = inputs.float().to(device) labels = labels.float().to(device) outputs = model(inputs) # RMSLE Loss loss = criterion(outputs, labels) # weights update optimizer.zero_grad() loss.backward() optimizer.step() # loss and rmsle calculate train_loss += loss.item() # save the best model weights as .pth file train_loss_epoch = sqrt(train_loss / len(full_dataset)) if best == -1 or train_loss_epoch < best: best_loss = train_loss_epoch best_epoch = e torch.save(model.state_dict(), f'mymodel_{state_name}.pth') print(f'Training Loss: {train_loss_epoch:.6f}') # save loss and RMSLE every epoch train_loss_curve.append(train_loss_epoch) # print the best RMSLE print(f"Final Training RMSLE Loss = {best_loss:.6f}") visualize(value=train_loss_curve, title='Train Loss Curve', filename=f'rmsle_{state_name}.png') return full_dataset
def main(): global args args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for key in config: for k, v in config[key].items(): setattr(args, k, v) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) # Normalize the test set same as training set without augmentation transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.imbalance == "regular": train_dataset = torchvision.datasets.CIFAR10( root='./data', train=True, download=True, transform=transform_train) else: train_dataset = IMBALANCECIFAR10(root='../part1-convnet/data', transform=transform_train, ) cls_num_list = train_dataset.get_cls_num_list() if args.reweight: per_cls_weights = reweight(cls_num_list, beta=args.beta) if torch.cuda.is_available(): per_cls_weights = per_cls_weights.cuda() else: per_cls_weights = None train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) test_dataset = torchvision.datasets.CIFAR10( root='./data', train=False, download=True, transform=transform_test) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=100, shuffle=False, num_workers=2) if args.model == 'TwoLayerNet': model = TwoLayerNet(3072, 256, 10) elif args.model == 'VanillaCNN': model = VanillaCNN() elif args.model == 'MyModel': model = MyModel() elif args.model == 'ResNet-32': model = resnet32() print(model) if torch.cuda.is_available(): model = model.cuda() if args.loss_type == "CE": criterion = nn.CrossEntropyLoss() else: criterion = FocalLoss(weight=per_cls_weights, gamma=1) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.reg) best = 0.0 best_cm = None best_model = None for epoch in range(args.epochs): adjust_learning_rate(optimizer, epoch, args) # train loop train(epoch, train_loader, model, optimizer, criterion) # validation loop acc, cm = validate(epoch, test_loader, model, criterion) if acc > best: best = acc best_cm = cm best_model = copy.deepcopy(model) print('Best Prec @1 Acccuracy: {:.4f}'.format(best)) per_cls_acc = best_cm.diag().detach().numpy().tolist() for i, acc_i in enumerate(per_cls_acc): print("Accuracy of Class {}: {:.4f}".format(i, acc_i)) if args.save_best: torch.save(best_model.state_dict(), './checkpoints/' + args.model.lower() + '.pth')