def test(full_dataset, state_name): # load model and use weights we saved before. model = MyModel() model.load_state_dict( torch.load(f'mymodel_{state_name}.pth', map_location='cpu')) model.eval() criterion = RMSLELoss() # convert dataframe to tensor inputs = full_dataset.test[full_dataset.col].astype(float).values inputs = torch.tensor(inputs).float() # predict outputs = model(inputs) # get labels labels = full_dataset.test_target.values # RMSLE Loss loss = criterion(outputs, torch.from_numpy(labels)) test_loss = sqrt(loss / len(full_dataset.test)) print(f'Testing Loss: {test_loss:.6f}') #save the result result = full_dataset.test["id"].to_frame() result.insert(1, "visitors_pred", outputs.detach().numpy()) result.insert(2, "visitors_actual", labels) result.to_csv(f'result_{state_name}.csv', index=False)
def test(): # load model and use weights we saved before. model = MyModel() model.load_state_dict(torch.load('mymodel.pth', map_location='cpu')) model.eval() # load testing data data = pd.read_csv('test.csv', encoding='utf-8') label_col = [ 'Input_A6_024', 'Input_A3_016', 'Input_C_013', 'Input_A2_016', 'Input_A3_017', 'Input_C_050', 'Input_A6_001', 'Input_C_096', 'Input_A3_018', 'Input_A6_019', 'Input_A1_020', 'Input_A6_011', 'Input_A3_015', 'Input_C_046', 'Input_C_049', 'Input_A2_024', 'Input_C_058', 'Input_C_057', 'Input_A3_013', 'Input_A2_017' ] # ================================================================ # # if do some operations with training data, # do the same operations to the testing data in this block data = data.fillna(0) # ================================================================ # # convert dataframe to tensor, no need to rewrite inputs = data.values inputs = torch.tensor(inputs) # predict and save the result result = pd.DataFrame(columns=label_col) outputs = model(inputs.float()) for i in range(len(outputs)): tmp = outputs[i].detach().numpy() tmp = pd.DataFrame([tmp], columns=label_col) result = pd.concat([result, tmp], ignore_index=True) result.to_csv('result.csv', index=False)
def predict(dim, names, weight, batch_size, pretrain_model_path, model_types=None): print('-' * 100) print('multi-models begin predicting ...') print('-' * 100) # read test data test_file = '/kaggle/input/quora-question-pairs/test.csv.zip' # data test_df = pd.read_csv(test_file) test_ids = test_df['test_id'].values.tolist() result_prob_tmp = torch.zeros((len(test_ids), 2)) # load model for i, name in enumerate(names): # 3.17 add weight_ = weight[i] #model_path = '../model/' + name + '.pkl' output_model_file = os.path.join('output', name + '.pkl') state = torch.load(output_model_file) # 3.10 add model_type = model_types[i] if model_type == 'mlp': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'cnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyTextCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'rcnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyRCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) model.to(device) model.load_state_dict(state['model_state']) model.eval() print('-' * 20, 'model', i, '-' * 20) print('load model:%s, loss:%.4f, e:%d, lr:%.7f, time:%d' % (name, state['loss'], state['e'], state['lr'], state['time'])) # predict with torch.no_grad(): j = 0 for batch in tqdm(test_iter): batch = [b.cuda() for b in batch] out = model(batch, task='eval') out = out.cpu() # gpu -> cpu if j == 0: tmp = out # 初始化 tmp else: tmp = torch.cat([tmp, out], dim=0) # 将之后的预测结果拼接到 tmp 中 j += 1 # 当前 模型预测完成 print('model', i, 'predict finished!\n') # 3.17 按权重融合 result_prob_tmp += (weight_ / len(names)) * tmp # 删除模型 del model gc.collect() time.sleep(1) # 3.10 当前融合策略:prob 简单的取 avg _, result = torch.max(result_prob_tmp, dim=-1) result = result.numpy() # 3.16 update: label 0的prob 大于 3,就认为是 label=0 # with open('tmp.txt', 'w', encoding='utf-8') as f: # for r in result_prob_tmp: # f.write(str(r) + '\n') # save result df = pd.DataFrame() df['test_id'] = test_ids df['is_duplicate'] = result df.to_csv("submission.csv", encoding='utf-8', index=False)
def train( batch_size=16, pretrain_model_path='', name='', model_type='mlp', after_bert_choice='last_cls', dim=1024, lr=1e-5, epoch=12, smoothing=0.05, sample=False, #open_ad='', dialog_name='xxx'): if not pretrain_model_path or not name: assert 1 == -1 print('\n********** model type:', model_type, '**********') print('batch_size:', batch_size) # load dataset train_file = '/kaggle/input/dataset/my_train.csv' dev_file = '/kaggle/input/dataset/my_dev.csv' train_num = len(pd.read_csv(train_file).values.tolist()) val_num = len(pd.read_csv(dev_file).values.tolist()) print('train_num: %d, dev_num: %d' % (train_num, val_num)) # 选择模型 if model_type in ['siam', 'esim', 'sbert']: assert 1 == -1 else: train_iter = MyDataset(file=train_file, is_train=True, sample=sample, pretrain_model_path=pretrain_model_path) train_iter = get_dataloader(train_iter, batch_size, shuffle=True, drop_last=True) dev_iter = MyDataset(file=dev_file, is_train=True, sample=sample, pretrain_model_path=pretrain_model_path) dev_iter = get_dataloader(dev_iter, batch_size, shuffle=False, drop_last=False) if model_type == 'mlp': model = MyModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing, after_bert_choice='last_cls') elif model_type == 'cnn': model = MyTextCNNModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing) elif model_type == 'rcnn': model = MyRCNNModel(dim=dim, pretrain_model_path=pretrain_model_path, smoothing=smoothing) #模型加载到gpu model.to(device) model_param_num = 0 ##### 3.24 muppti-gpu-training if n_gpu > 1: model = torch.nn.DataParallel(model) for p in model.parameters(): if p.requires_grad: model_param_num += p.nelement() print('param_num:%d\n' % model_param_num) # 加入对抗训练,提升泛化能力;但是训练速度明显变慢 (插件式调用) # 3.12 change to FGM 更快! """ if open_ad == 'fgm': fgm = FGM(model) elif open_ad == 'pgd': pgd = PGD(model) K = 3 """ # model-store-path #model_path = '/kaggle/output/' + name + '.pkl' # 输出模型默认存放在当前路径下 output_dir = 'output' state = {} time0 = time.time() best_loss = 999 early_stop = 0 for e in range(epoch): print("*" * 100) print("Epoch:", e) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=0.05, t_total=len(train_iter)) # 设置优化器 train_loss = 0 train_c = 0 train_right_num = 0 model.train() # 将模型设置成训练模式(Sets the module in training mode) print('training..., %s, e:%d, lr:%7f' % (name, e, lr)) for batch in tqdm(train_iter): # 每一次返回 batch_size 条数据 optimizer.zero_grad() # 清空梯度 batch = [b.to(device) for b in batch] # cpu -> GPU # 正常训练 labels = batch[-1].view(-1).cpu().numpy() loss, bert_enc = model(batch, task='train', epoch=epoch) # 进行前向传播,真正开始训练;计算 loss right_num = count_right_num(bert_enc, labels) # multi-gpu training! if n_gpu > 1: loss = loss.mean() loss.backward() # 反向传播计算参数的梯度 #""" if open_ad == 'fgm': # 对抗训练 fgm.attack() # 在embedding上添加对抗扰动 if model_type == 'multi-task': loss_adv, _, _ = model(batch, task='train') else: loss_adv, _ = model(batch, task='train') if n_gpu > 1: loss_adv = loss_adv.mean() loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 fgm.restore() # 恢复embedding参数 elif open_ad == 'pgd': pgd.backup_grad() # 对抗训练 for t in range(K): pgd.attack(is_first_attack=( t == 0 )) # 在embedding上添加对抗扰动, first attack时备份param.data if t != K - 1: optimizer.zero_grad() else: pgd.restore_grad() if model_type == 'multi-task': loss_adv, _, _ = model(batch, task='train') else: loss_adv, _ = model(batch, task='train') if n_gpu > 1: loss_adv = loss_adv.mean() loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 pgd.restore() # 恢复embedding参数 #""" optimizer.step() # 更新参数 train_loss += loss.item() # loss 求和 train_c += 1 train_right_num += right_num val_loss = 0 val_c = 0 val_right_num = 0 model.eval() print('eval...') with torch.no_grad(): # 不进行梯度的反向传播 for batch in tqdm(dev_iter): # 每一次返回 batch_size 条数据 batch = [b.to(device) for b in batch] labels = batch[-1].view(-1).cpu().numpy() loss, bert_enc = model(batch, task='train', epoch=epoch) # 进行前向传播,真正开始训练;计算 loss right_num = count_right_num(bert_enc, labels) if n_gpu > 1: loss = loss.mean() val_c += 1 val_loss += loss.item() val_right_num += right_num train_acc = train_right_num / train_num val_acc = val_right_num / val_num print('train_acc: %.4f, val_acc: %.4f' % (train_acc, val_acc)) print('train_loss: %.4f, val_loss: %.4f, time: %d' % (train_loss / train_c, val_loss / val_c, time.time() - time0)) if val_loss / val_c < best_loss: early_stop = 0 best_loss = val_loss / val_c best_acc = val_acc # 3.24 update 多卡训练时模型保存避坑: if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model.module if hasattr(model, 'module') else model state['model_state'] = model_to_save.state_dict() state['loss'] = val_loss / val_c state['acc'] = val_acc state['e'] = e state['time'] = time.time() - time0 state['lr'] = lr output_model_file = os.path.join(output_dir, name + '.pkl') torch.save(state, output_model_file) #torch.save(state, model_path) best_epoch = e cost_time = time.time() - time0 tmp_train_acc = train_acc best_model = model else: early_stop += 1 if early_stop == 2: break model = best_model lr = lr * 0.5 print("best_loss:", best_loss) # 3.12 add 打印显示最终的最优结果 print('-' * 30) print('best_epoch:', best_epoch, 'best_loss:', best_loss, 'best_acc:', best_acc, 'reach time:', cost_time, '\n') # model-clean del model gc.collect() # 实验结果写入日志 """