def model_test(**kwargs): ''' 根据传进来的参数设置加载的模型,将模型的测试加过写入到保存文件中 ''' opt.parse(kwargs) model = getattr(Nets, opt.model)().eval() if opt.load_model_path: model.load(opt.load_model_path) #使用cuda if opt.use_gpu: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) #准备测试数据 test_data = imageSentiment(opt.train_path, test=True,train=False) # 训练集 test_dataloader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) results = [] with torch.no_grad(): for ii, data in tqdm(enumerate(dataloader), total=len(dataloader)): label,input = data if opt.use_gpu: model.to(device) label,input = label.to(device),input.to(device) score = model(input) _, predicted = torch.max(score.data, 1) batch_result = [(int(path_),int(label_)) for path_,label_ in zip(label,score)] results += batch_result write_csv(results,opt.result_file) #将结果写进CSV文件
def test(**kwargs): opt.parse(kwargs) import ipdb; ipdb.set_trace() # configure model model = getattr(models, opt.model)().eval() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # data train_data = DogCat(opt.test_data_root,test=True) test_dataloader = DataLoader(train_data,batch_size=opt.batch_size,shuffle=False,num_workers=opt.num_workers) results = [] for ii,(data,path) in tqdm(enumerate(test_dataloader)): input = t.autograd.Variable(data,volatile = True) if opt.use_gpu: input = input.cuda() score = model(input) probability = t.nn.functional.softmax(score)[:,0].data.tolist() # label = score.max(dim = 1)[1].data.tolist() batch_results = [(path_,probability_) for path_,probability_ in zip(path,probability) ] results += batch_results write_csv(results,opt.result_file) return results
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) #step1: config model model = getattr(Nets,opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) #step2: data train_data = imageSentiment(opt.train_path,train = True) #训练集 val_data = imageSentiment(opt.train_path,train = False) #验证集 train_dataloader = DataLoader(train_data,batch_size = opt.batch_size,shuffle=True,num_workers = opt.num_workers) val_dataloader = DataLoader(val_data,batch_size = opt.batch_size,shuffle=False,num_workers = opt.num_workers) #step3: 定义损失函数及优化器 # criterion = nn.CrossEntropyLoss() #交叉熵损失函数 如果使用该损失函数 则网络最后无需使用softmax函数 lr = opt.lr # optimizer = Optim.Adam(model.parameters(),lr = lr,weight_decay= opt.weight_decay) optimizer = Optim.SGD(model.parameters(),lr = 0.001,momentum=0.9,nesterov=True) #step4: 统计指标(计算平均损失以及混淆矩阵) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(7) previous_loss = 1e100 #训练 for i in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() total_loss = 0. for ii,(label,data) in tqdm(enumerate(train_dataloader),total=len(train_dataloader)): if opt.use_gpu: label,data = label.to(device),data.to(device) optimizer.zero_grad() score = model(data) # ps:使用nll_loss和crossentropyloss进行多分类时 target为索引标签即可 无需转为one-hot loss = F.nll_loss(score,label) total_loss += loss.item() loss.backward() optimizer.step() #更新统计指标以及可视化 loss_meter.add(loss.item()) confusion_matrix.add(score.data,label.data) if ii%opt.print_freq==opt.print_freq-1: vis.plot('loss',loss_meter.value()[0]) vis.plot('mach avgloss', total_loss/len(train_dataloader)) model.save() #计算验证集上的指标 val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy)
def train(**kwargs): opt.parse(kwargs) # step1: configure model #model = getattr(models, opt.model)() #model = Pre_models.densenet121(pretrained=True);#过拟合 #model = Pre_models.densenet121() #model = models.densenet169(pretrained=True) model = models.densenet201(pretrained=True) #model = Pre_models.densenet161(pretrained=True) model.classifier = torch.nn.Linear(1920, 2) model = model.cuda() model = torch.nn.DataParallel(model) #if opt.load_model_path: # model.load(opt.load_model_path) #if opt.use_gpu: model.cuda() # step2: data train_data = CAG(opt.train_data_root, train=True) val_data = CAG(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer loss_func = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.SGD(model.parameters(), lr=lr) previous_loss = 1.0 # train for epoch in range(opt.max_epoch): print('epoch {}'.format(epoch + 1)) train_num = 1 train_acc = 0 batch_num = 0 loss_sum = 0 for ii, (data, label) in enumerate(train_dataloader): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) probability = t.nn.functional.softmax(score, dim=1) _, result = torch.max(probability, 1) train_correct = (result == target).sum() train_acc += train_correct.item() train_num += target.size(0) loss = loss_func(probability, target) loss.backward() optimizer.step() loss_sum += loss.item() batch_num += 1 print("当前loss:", loss_sum / batch_num) logger.scalar_summary('train_loss', loss_sum / batch_num, epoch) accuracy = train_acc / train_num logger.scalar_summary('train_accurancy', accuracy, epoch) if (epoch + 1) % 100 == 0: lr = lr * opt.lr_decay print("当前学习率", lr) logger.scalar_summary('lr', lr, epoch) for param_group in optimizer.param_groups: #optimizer通过param_group来管理参数组. 通过更改param_group[‘lr’]的值来更改对应参数组的学习率。 param_group['lr'] = lr #previous_loss = loss.item() # validate and visualize if (epoch + 1) % 5 == 0: val_accuracy, val_loss = val(model, val_dataloader) print("验证集上准确率为:", val_accuracy) #prefix = '/home/hdc/yfq/CAG/checkpoints/Densenet121' #name = time.strftime(prefix + '%m%d_%H:%M:%S.pth') if val_accuracy >= 0.96: #torch.save(model.state_dict(), name) opt.flag = True if val_accuracy < 0.96: opt.flag = False logger.scalar_summary('val_accurancy', val_accuracy, epoch) print("val_loss:", val_loss) logger.scalar_summary('val_loss', val_loss, epoch)
def train(**kwargs): opt.parse(kwargs) # step1: configure model (defined in models.py) model = FCN8s() device = t.device('cpu') if opt.use_gpu == True: device = t.device("cuda:0" if t.cuda.is_available() else "cpu") print(device) model.to(device) # step2: data preparation train_data = KaggleSalt(root=opt.train_data_root) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_data = KaggleSalt(opt.train_data_root) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.BCEWithLogitsLoss() lr = opt.lr optimizer = t.optim.RMSprop(model.parameters(), lr=lr, momentum=opt.momentum, weight_decay=opt.weight_decay) loss_pre = 1e10 # step4: training for epoch in range(opt.max_epoch): loss_now = 0 for ii, (data, label) in tqdm(enumerate(train_dataloader)): data = data.to(device) label = t.Tensor(label.float()) label = label.to(device) # Forward pass #import ipdb #ipdb.set_trace() heatmap = model(data) #import ipdb #ipdb.set_trace() loss = criterion(heatmap.reshape(-1), label.reshape(-1)) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() # update and visualize loss_now += loss.item() model.save() val_accuracy = val(model, val_dataloader) print('epoch:%d: loss:%f / acc:%f' % (epoch, loss_now, val_accuracy)) if loss_now > loss_pre: lr = lr * opt.lr_decay loss_pre = loss_now
def train(**kwargs): # The received parameters will be used to update configuration dict opt.parse(kwargs) # Step 0: data and device inputs = get_inputs(data_dir=opt.data_dir, corpus_file=opt.corpus_file, vocab_file=opt.vocab_path) train_dataloader = DataLoader(dataset=BERTDataset( inputs, max_sen_len=opt.max_sen_len), shuffle=True, batch_size=opt.batch_size, collate_fn=BERTCollate_fn) use_cuda = True if opt.use_cuda and torch.cuda.is_available() else False if use_cuda: torch.cuda.empty_cache() device = torch.device('cuda' if use_cuda else 'cpu') writer = SummaryWriter() # Step 1: model bert = BERT(n_layers=opt.n_layers, d_model=opt.d_model, vocab_size=opt.max_vocab_size, max_len=opt.max_sen_len, n_heads=opt.n_heads, n_seg=opt.n_seg, ff_hidden=opt.n_ff_hidden, device=device).to(device) masked_lm = MaskedLM(d_model=opt.d_model, vocab_size=opt.max_vocab_size, bert=bert).to(device) next_pred = NextPred(d_model=opt.d_model).to(device) # Write model dummy_input_ids = torch.zeros( (opt.batch_size, opt.max_sen_len)).long().to(device) dummy_seg_ids = torch.zeros( (opt.batch_size, opt.max_sen_len)).long().to(device) writer.add_graph(bert, (dummy_input_ids, dummy_seg_ids), False) # dummy_bertout = torch.zeros((opt.batch_size, opt.max_sen_len, opt.d_model)).long().to(device) # dummy_masked_pos = torch.zeros((opt.batch_size, opt.max_mask_len)).long().to(device) # # writer.add_graph(masked_lm, (dummy_bertout, dummy_masked_pos), True) # writer.add_graph(next_pred, (dummy_bertout), True) # Step 2: criterion and optimizer criterion = nn.CrossEntropyLoss() num_paras = sum(p.numel() for model in (bert, masked_lm, next_pred) for p in model.parameters() if p.requires_grad) paras = list(bert.parameters()) + list(masked_lm.parameters()) + list( next_pred.parameters()) print("Total number of parameters is {}".format(num_paras)) optimizer = torch.optim.Adam(paras, lr=0.0001, betas=(0.9, 0.999), weight_decay=0.01) # Step 3: train print("Start training ...") for epoch in range(opt.epochs): epoch_loss = 0 for i, batch_data in enumerate(train_dataloader, 1): input_ids, seg_ids, masked_pos, masked_token, isnext = map( lambda x: x.to(device), batch_data) # Reset gradients and forward optimizer.zero_grad() bertout = bert(input_ids, seg_ids) logits_lm = masked_lm(bertout, masked_pos) logits_clsf = next_pred(bertout) # Compute loss logits_lm = logits_lm.view( -1, logits_lm.size(-1)) # (bz * len_mask, vocab) masked_token = masked_token.view(-1, ) # (bz * len_mask, ) logits_clsf = logits_clsf.view(-1, logits_clsf.size(-1)) # (bz, ) isnext.view(-1, ) # (bz, ) loss_lm = criterion(logits_lm, masked_token) loss_clsf = criterion(logits_clsf, isnext) loss = loss_lm + loss_clsf _, mask_preds = torch.max(logits_lm, dim=-1) _, next_preds = torch.max(logits_clsf, dim=-1) mask_pred_acc = mask_preds.eq( masked_token).sum().item() / masked_token.size(0) next_pred_acc = next_preds.eq(isnext).sum().item() / isnext.size(0) if i % 20 == 0: writer.add_scalar('loss_lm', loss_lm.item(), i + epoch * len(train_dataloader)) writer.add_scalar('loss_clsf', loss_clsf.item(), i + epoch * len(train_dataloader)) writer.add_scalar('lm_acc', mask_pred_acc, i + epoch * len(train_dataloader)) writer.add_scalar('next_acc', next_pred_acc, i + epoch * len(train_dataloader)) print( 'Epoch {}, Batch {}/{}, loss_lm={}, loss_next={}, lm_acc={}, next_acc={}' .format(epoch + 1, i, len(train_dataloader), loss_lm.item(), loss_clsf.item(), mask_pred_acc, next_pred_acc)) epoch_loss += loss.item() # Backward and update loss.backward() optimizer.step() if (1 + epoch) % 1 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(epoch_loss)) print('finished train') # Step 4: Save model ckpt_file_name = dt.strftime(dt.now(), '%Y-%m-%d %H: %M: %S.ckpt') save_path = os.path.join(opt.ckpt_path, ckpt_file_name) torch.save(bert.state_dict(), save_path)
def train(**kwargs): ''' 训练 :param kwargs: 可调整参数,默认是config中的默认参数 :return:训练出完整模型 ''' # 根据命令行参数更新配置 opt.parse(kwargs) # visdom绘图程序 vis = Visualizer(opt.env, port=opt.vis_port) # step:1 构建模型 # 选取配置中名字为model的模型 model = getattr(models, opt.model)() # 是否读取保存好的模型参数 if opt.load_model_path: model.load(opt.load_model_path) # 设置GPU os.environ["CUDA_VISIBLE_DEVICES"] = "2" model.to(opt.device) # step2: 数据 train_data = CWRUDataset2D(opt.train_data_root, train=True) # 测试数据集和验证数据集是一样的,这些数据是没有用于训练的 test_data = CWRUDataset2D(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True) test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False) # step3: 目标函数和优化器 # 损失函数,交叉熵 criterion = torch.nn.CrossEntropyLoss() lr = opt.lr # 优化函数,Adam optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: 统计指标,平滑处理之后的损失,还有混淆矩阵 # 损失进行取平均及方差计算。 loss_meter = meter.AverageValueMeter() # 混淆矩阵 confusion_matrix = meter.ConfusionMeter(opt.category) previous_loss = 1e10 # 训练 for epoch in range(opt.max_epoch): # 重置 loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): # 训练模型 input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # 更新统计指标以及可视化 loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), target.detach()) if (ii + 1) % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() # 每个batch保存模型 model.save() # 计算测试集上的指标和可视化 val_cm, val_accuracy = val(model, test_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # 如果损失不在下降,那么就降低学习率 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): print(kwargs) start = time.time() # 根据命令行参数更新配置 vis = Visualizer(opt.env) opt.parse(kwargs) # 加载词向量 print("Loading word vectors...Please wait.") vector = KeyedVectors.load_word2vec_format( os.path.join(os.path.dirname(os.path.realpath(opt.train_data_root)), 'vector.txt') ) print("Successfully loaded word vectors.") # step1: 模型 model = getattr(models, opt.model)(input_size=vector.vector_size+2, output_size=opt.class_num) if opt.load_model_path: model.load(opt.load_model_path) # 预加载 if opt.use_gpu and t.cuda.is_available(): model = model.cuda() print(f"Structure of {model.model_name}:\n{model}\n") # step2: 数据 train_data = Sentence(root=opt.train_data_root, relations=opt.relations, max_length=opt.max_length, vector=vector, train=True) # 训练集 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True) val_data = Sentence(opt.train_data_root, opt.relations, opt.max_length, vector, train=False) # 验证集 val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True) # step3: 目标函数和优化器 loss_fn = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(params=model.parameters(), lr=lr, weight_decay = opt.weight_decay) # step4: 统计指标:平滑处理之后的损失,还有混淆矩阵 loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(opt.class_num) previous_loss = 1e100 # 训练 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_dataloader): # 训练模型参数 input = data target = label if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() prediction = model(input) loss = loss_fn(prediction, target) loss.backward() optimizer.step() # 更新统计指标以及可视化 loss_meter.add(loss.item()) confusion_matrix.add(prediction.data, target.data) # if ii % opt.print_freq == opt.print_freq - 1: # vis.plot('train loss', loss_meter.value()[0]) # 如果需要的话,进入debug模式 # if os.path.exists(opt.debug_file): # import ipdb; # ipdb.set_trace() cm_value = confusion_matrix.value() correct = 0 for i in range(cm_value.shape[0]): correct += cm_value[i][i] accuracy = 100. * correct / (cm_value.sum()) vis.plot('train loss', loss_meter.value()[0]) vis.plot('train accuracy', accuracy) if epoch % opt.save_epoch == opt.save_epoch -1: model.save() # 计算验证集上的指标及可视化 val_lm, val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val loss', val_lm.value()[0]) vis.plot('val accuracy', val_accuracy) print("epoch:{epoch}, lr:{lr}, loss:{loss}\ntrain_cm:\n{train_cm}\nval_cm:\n{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr) ) # 如果损失不再下降,则降低学习率 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] cost = int(time.time()) - int(start) print(f"Cost {int(cost/60)}min{cost%60}s.")
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if os.path.exists(opt.load_model_path): model.load(opt.load_model_path) if opt.use_gpu: model.cuda() if os.path.exists(opt.pars_path): dic = load_dict(opt.pars_path) previous_loss = dic['loss'][-1] if 'loss' in dic.keys() else 1e100 else: dic = {} # step2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step2: criterion and optimizer criterion = nn.CrossEntropyLoss() lr = opt.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) #previous_loss = 1e100 # train for epoch in range(5, opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)): #confusion_matrix.reset() # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data.item()) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: dic = save_dict(opt.pars_path, dic, loss_data=loss_meter.value()[0]) #loss_meter.reset() vis.plot('loss', dic['loss_data']) name = model.save() if os.path.exists(opt.debug_file): import ipdb ipdb.set_trave() name = model.save() # update learning: reduce learning rate when loss no longer decrease if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] dic = save_dict(opt.pars_path, dic, name=name, epoch=epoch, lr=lr, loss=loss_meter.value()[0], train_cm=confusion_matrix.value()) # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) dic = save_dict(opt.pars_path, dic, val_accuracy=val_accuracy, val_cm=val_cm.value()) vis.log(dic)
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size, shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size, shuffle=False,num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii,(data,label) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data[0]) confusion_matrix.add(score.data, target.data) if ii%opt.print_freq==opt.print_freq-1: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() model.save() # validate and visualize val_cm,val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) NUM_TRAIN = 49000 transform = T.Compose([ T.ToTensor(), T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) train_transform = T.Compose([]) if opt.data_aug == True: train_transform.transforms.append(T.RandomCrop(32, padding=4)) train_transform.transforms.append(T.RandomHorizontalFlip()) train_transform.transforms.append(T.ToTensor()) train_transform.transforms.append( T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))) if opt.data_aug == True and opt.use_cutout == True: train_transform.transforms.append(Cutout(n_holes=1, length=16)) cifar10_train = dset.CIFAR10('./datasets', train=True, download=True, transform=train_transform) loader_train = DataLoader(cifar10_train, batch_size=opt.batch_size, sampler=sampler.SubsetRandomSampler( range(NUM_TRAIN))) cifar10_val = dset.CIFAR10('./datasets', train=True, download=True, transform=transform) loader_val = DataLoader(cifar10_val, batch_size=opt.batch_size, sampler=sampler.SubsetRandomSampler( range(NUM_TRAIN, 50000))) cifar10_test = dset.CIFAR10('./datasets', train=False, download=True, transform=transform) loader_test = DataLoader(cifar10_test, batch_size=opt.batch_size) if opt.use_gpu == True: device = torch.device('cuda') else: device = torch.device('cpu') model = getattr(models, opt.model)() if opt.use_trained_model == True: cp_name = opt.model if opt.checkpoint_load_name != None: cp_name = opt.checkpoint_load_name model.load(opt.test_model_path + cp_name) model.to(device) lr = opt.lr optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=opt.weight_decay, nesterov=True) stages = [] lrs = [] if opt.stage1 != -1: stages.append(opt.stage1) lrs.append(opt.lr1) if opt.stage2 != -1: stages.append(opt.stage2) lrs.append(opt.lr2) if opt.stage3 != -1: stages.append(opt.stage3) lrs.append(opt.lr3) if opt.stage4 != -1: stages.append(opt.stage4) lrs.append(opt.lr4) for i, (stage_epoch) in enumerate(stages): print(stage_epoch) print(lrs[i]) for param_group in optimizer.param_groups: param_group['lr'] = lrs[i] for epoch in range(stage_epoch): for ii, (data, label) in tqdm(enumerate(loader_train)): # put data into gpu data = data.to(device=device, dtype=torch.float32) label = label.to(device=device, dtype=torch.long) # get loss # print(data.shape) scores = model(data) loss = F.cross_entropy(scores, label) # bp optimizer.zero_grad() loss.backward() if opt.use_clip == True: torch.nn.utils.clip_grad_norm_(model.parameters(), opt.clip, norm_type=2) optimizer.step() loss_now = loss.item() if ii == 0: print('Epoch [{}/{}], Loss: {:.4f}, lr :{: f}'.format( epoch + 1, stage_epoch, loss_now, lrs[i])) testacc = check_acc(loader_test, model) valacc = check_acc(loader_val, model) model.update_epoch(lrs[i], loss_now, valacc, testacc) model.save(opt.checkpoint_save_name)
def main(**kwargs): #动态加全职衰减 origin_weight_decay = 1e-5 opt.parse(kwargs, print_=False) if opt.debug: import ipdb ipdb.set_trace() model = getattr(models, opt.model)(opt).cuda() if opt.model_path: model.load(opt.model_path) print(model) opt.parse(kwargs, print_=True) vis.reinit(opt.env) pre_loss = 1.0 lr, lr2 = opt.lr, opt.lr2 loss_function = getattr(models, opt.loss)() # if opt.all:dataset = ZhihuALLData(opt.train_data_path,opt.labels_path,type_=opt.type_) dataset = FoldData(opt.train_data_path, opt.labels_path, type_=opt.type_, fold=opt.fold) dataloader = data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=True) optimizer = model.get_optimizer(lr, opt.lr2, opt.weight_decay) loss_meter = tnt.meter.AverageValueMeter() score_meter = tnt.meter.AverageValueMeter() best_score = 0 # pre_score = 0 for epoch in range(opt.max_epoch): loss_meter.reset() score_meter.reset() for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)): title, content, label = Variable(title.cuda()), Variable( content.cuda()), Variable(label.cuda()) optimizer.zero_grad() score = model(title, content) loss = loss_function(score, opt.weight * label.float()) loss_meter.add(loss.data[0]) loss.backward() optimizer.step() if ii % opt.plot_every == opt.plot_every - 1: if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() predict = score.data.topk(5, dim=1)[1].cpu().tolist( ) #(dim=1,descending=True)[1][:,:5].tolist() true_target = label.data.float().topk( 5, dim=1) #[1].cpu().tolist()#sort(dim=1,descending=True) true_index = true_target[1][:, :5] true_label = true_target[0][:, :5] predict_label_and_marked_label_list = [] for jj in range(label.size(0)): true_index_ = true_index[jj] true_label_ = true_label[jj] true = true_index_[true_label_ > 0] predict_label_and_marked_label_list.append( (predict[jj], true.tolist())) score_, prec_, recall_, _ss = get_score( predict_label_and_marked_label_list) score_meter.add(score_) vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' % (prec_, recall_, score_, _ss), win='tmp') vis.plot('scores', score_meter.value()[0]) #eval() vis.plot('loss', loss_meter.value()[0]) # 随机展示一个输出的分布 k = t.randperm(label.size(0))[0] output = t.nn.functional.sigmoid(score) # vis.vis.histogram( # output.data[k].view(-1).cpu(), win=u'output_hist', opts=dict # (title='output_hist')) # print "epoch:%4d/%4d,time: %.8f,loss: %.8f " %(epoch,ii,time.time()-start,loss_meter.value()[0]) if ii % opt.decay_every == opt.decay_every - 1: del loss scores, prec_, recall_, _ss = val(model, dataset) if scores > best_score: best_score = scores best_path = model.save(name=str(scores), new=True) vis.log({ ' epoch:': epoch, ' lr: ': lr, 'scores': scores, 'prec': prec_, 'recall': recall_, 'ss': _ss, 'scores_train': score_meter.value()[0], 'loss': loss_meter.value()[0] }) if scores < best_score: model.load(best_path, change_opt=False) #lr = lr*opt.lr_decay #optimizer = model.get_optimizer(lr) lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等的丢失 if lr2 == 0: lr2 = 2e-4 else: lr2 = lr2 * 0.8 optimizer = model.get_optimizer(lr, lr2, 0) origin_weight_decay = 5 * origin_weight_decay # optimizer = model.get_optimizer(lr,lr2,0,weight_decay=origin_weight_decay) # origin_weight_decay=5*origin_weight_decay # for param_group in optimizer.param_groups: # param_group['lr'] *= opt.lr_decay # if param_group['lr'] ==0: # param_group['lr'] = 1e-4 pre_loss = loss_meter.value()[0] # pre_score = score_meter.value()[0] # pre_score = scores loss_meter.reset() score_meter.reset() if lr < opt.min_lr: break
def train(**kwargs): """根据命令行参数更新配置""" opt.parse(kwargs) vis = Visualizer(opt.env) """(1)step1:加载网络,若有预训练模型也加载""" #model = getattr(models,opt.model)() model = models.resnet34(pretrained=True) model.fc = nn.Linear(512, 2) #if opt.load_model_path: # model.load(opt.load_model_path) if opt.use_gpu: #GPU model.cuda() """(2)step2:处理数据""" train_data = DogCat(opt.train_data_root, train=True) #训练集 val_data = DogCat(opt.train_data_root, train=False) #验证集 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) """(3)step3:定义损失函数和优化器""" criterion = t.nn.CrossEntropyLoss() #交叉熵损失 lr = opt.lr #学习率 optimizer = t.optim.SGD(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵""" loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 """(5)开始训练""" for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_dataloader): print "ii:", ii #训练模型参数 input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() #梯度清零 optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() #反向传播 #更新参数 optimizer.step() #更新统计指标及可视化 loss_meter.add(loss.item()) #print score.shape,target.shape confusion_matrix.add(score.detach(), target.detach()) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() #model.save() name = time.strftime('model' + '%m%d_%H:%M:%S.pth') t.save(model.state_dict(), 'checkpoints/' + name) """计算验证集上的指标及可视化""" val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) print "epoch:", epoch, "loss:", loss_meter.value( )[0], "accuracy:", val_accuracy """如果损失不再下降,则降低学习率""" if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group["lr"] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model # model = getattr(models, opt.model)() # opt.model = ResNet34 模块内的文件可以看做是它的属性 model = Mymodel(pretrained=True) model = model.model # 直接调用torchvision中的resnet34 # 预训练里面是包括最后全连接层的,所以直接调用会报错: While copying the parameter named fc.weight, whose dimensions in the model are torch.Size([120, 512]) # and whose dimensions in the checkpoint are torch.Size([1000, 512]). # pre_model = resnet34(pretrained=True) # Linear = t.nn.Linear(1000, opt.num_classes) # model = t.nn.Sequential( # pre_model, # Linear # ) if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogBreedData(opt.train_data_root, train=True) val_data = DogBreedData(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # setp3: loss and optimizer loss = t.nn.CrossEntropyLoss() optimizer = t.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() # 一个类,计算平均值,方差的 # confusion_matrix = meter.ConfusionMeter(120) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): # one epoch 表示遍历整个数据集 loss_meter.reset() # confusion_matrix.reset() # loss = 0 # 损失值清零,计算每一个epoch的损失值的平均值 for ii,(data, label) in tqdm(enumerate(train_dataloader)): input = Variable(data) target = Variable(label.type(t.LongTensor)) if opt.use_gpu: input = input.cuda() target = target.cuda() # 梯度清零 optimizer.zero_grad() # 前向传播 score = model(input) # 计算交叉熵损失 loss = loss(score, target) # loss = t.nn.CrossEntropyLoss(_WeightedLoss)也是Module类,这里是forward()函数 # 反向传播,梯度下降 loss.backward() optimizer.step() # loss update and visualize # 两种方法对比以下。。 loss_meter.add(loss.data[0]) # def value(self): return self.mean, self.std # confusion_matrix.add(score.data, target.data) # 更新loss # loss.data[0] += previous_loss # loss_old = loss.data[0] # loss_mean = loss_old/float(ii) if ii%opt.print_freq == opt.print_freq-1: # vis.plot('loss', loss_mean) vis.plot('loss', loss_meter.value()[0]) model.save() # validate and visualize # val_cm, val_accuracy = val(model, val_dataloader) # vis.plot('val_accuracy', val_accuracy) # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( # epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), # train_cm=str(confusion_matrix.value()), lr=opt.lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = opt.lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) # log file ps = PlotSaver("FrozenCNN_ResNet50_RGB_" + time.strftime("%m_%d_%H:%M:%S") + ".log.txt") # step1: Model compression_model = getattr(models, opt.model)( use_imp=opt.use_imp, model_name="CWCNN_limu_ImageNet_imp_r={r}_γ={w}_for_resnet50".format( r=opt.rate_loss_threshold, w=opt.rate_loss_weight) if opt.use_imp else None) compression_model.load(None, opt.compression_model_ckpt) compression_model.eval() # if use_original_RGB: # resnet_50 = resnet50() # Official ResNet # else: # resnet_50 = ResNet50() # My ResNet c_resnet_51 = cResNet51() if opt.use_gpu: # compression_model.cuda() # resnet_50.cuda() compression_model = multiple_gpu_process(compression_model) c_resnet_51 = multiple_gpu_process(c_resnet_51) # freeze the compression network for param in compression_model.parameters(): # print (param.requires_grad) param.requires_grad = False cudnn.benchmark = True # pdb.set_trace() # step2: Data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_data_transforms = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) val_data_transforms = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) train_data = datasets.ImageFolder(opt.train_data_root, train_data_transforms) val_data = datasets.ImageFolder(opt.val_data_root, val_data_transforms) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) # step3: criterion and optimizer class_loss = t.nn.CrossEntropyLoss() lr = opt.lr # optimizer = t.optim.Adam(resnet_50.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=opt.weight_decay) optimizer = t.optim.SGD(c_resnet_51.parameters(), lr=lr, momentum=opt.momentum, weight_decay=opt.weight_decay) start_epoch = 0 if opt.resume: start_epoch = c_resnet_51.module.load( None if opt.finetune else optimizer, opt.resume, opt.finetune) if opt.finetune: print('Finetune from model checkpoint file', opt.resume) else: print('Resume training from checkpoint file', opt.resume) print('Continue training at epoch %d.' % start_epoch) # step4: meters class_loss_meter = AverageValueMeter() class_acc_top5_meter = AverageValueMeter() class_acc_top1_meter = AverageValueMeter() # class_loss_meter = AverageMeter() # class_acc_top5_meter = AverageMeter() # class_acc_top1_meter = AverageMeter() # ps init ps.new_plot('train class loss', opt.print_freq, xlabel="iteration", ylabel="train_CE_loss") ps.new_plot('val class loss', 1, xlabel="epoch", ylabel="val_CE_loss") ps.new_plot('train top_5 acc', opt.print_freq, xlabel="iteration", ylabel="train_top5_acc") ps.new_plot('train top_1 acc', opt.print_freq, xlabel="iteration", ylabel="train_top1_acc") ps.new_plot('val top_5 acc', 1, xlabel="iteration", ylabel="val_top_5_acc") ps.new_plot('val top_1 acc', 1, xlabel="iteration", ylabel="val_top_1_acc") for epoch in range(start_epoch + 1, opt.max_epoch + 1): # per epoch avg loss meter class_loss_meter.reset() class_acc_top1_meter.reset() class_acc_top5_meter.reset() # cur_epoch_loss refresh every epoch ps.new_plot("cur epoch train class loss", opt.print_freq, xlabel="iteration in cur epoch", ylabel="cur_train_CE_loss") c_resnet_51.train() for idx, (data, label) in enumerate(train_dataloader): ipt = Variable(data) label = Variable(label) if opt.use_gpu: ipt = ipt.cuda() label = label.cuda() optimizer.zero_grad() # if not use_original_RGB: # compressed_RGB = compression_model(ipt) # else: # compressed_RGB = ipt # We just wanna compressed features, not to decode this. compressed_feat = compression_model(ipt, need_decode=False) # print ('RGB', compressed_RGB.requires_grad) predicted = c_resnet_51(compressed_feat) class_loss_ = class_loss(predicted, label) class_loss_.backward() optimizer.step() class_loss_meter.add(class_loss_.data[0]) # class_loss_meter.update(class_loss_.data[0], ipt.size(0)) acc1, acc5 = accuracy(predicted.data, label.data, topk=(1, 5)) # pdb.set_trace() class_acc_top1_meter.add(acc1[0]) class_acc_top5_meter.add(acc5[0]) # class_acc_top1_meter.update(acc1[0], ipt.size(0)) # class_acc_top5_meter.update(acc5[0], ipt.size(0)) if idx % opt.print_freq == opt.print_freq - 1: ps.add_point( 'train class loss', class_loss_meter.value()[0] if opt.print_smooth else class_loss_.data[0]) ps.add_point( 'cur epoch train class loss', class_loss_meter.value()[0] if opt.print_smooth else class_loss_.data[0]) ps.add_point( 'train top_5 acc', class_acc_top5_meter.value()[0] if opt.print_smooth else acc5[0]) ps.add_point( 'train top_1 acc', class_acc_top1_meter.value()[0] if opt.print_smooth else acc1[0]) ps.log( 'Epoch %d/%d, Iter %d/%d, class loss = %.4f, top 5 acc = %.2f %%, top 1 acc = %.2f %%, lr = %.8f' % (epoch, opt.max_epoch, idx, len(train_dataloader), class_loss_meter.value()[0], class_acc_top5_meter.value()[0], class_acc_top1_meter.value()[0], lr)) # 进入debug模式 if os.path.exists(opt.debug_file): pdb.set_trace() if use_data_parallel: c_resnet_51.module.save(optimizer, epoch) # plot before val can ease me ps.make_plot( 'train class loss' ) # all epoch share a same img, so give ""(default) to epoch ps.make_plot('cur epoch train class loss', epoch) ps.make_plot("train top_5 acc") ps.make_plot("train top_1 acc") val_class_loss, val_top5_acc, val_top1_acc = val( compression_model, c_resnet_51, val_dataloader, class_loss, None, ps) ps.add_point('val class loss', val_class_loss) ps.add_point('val top_5 acc', val_top5_acc) ps.add_point('val top_1 acc', val_top1_acc) ps.make_plot('val class loss') ps.make_plot('val top_5 acc') ps.make_plot('val top_1 acc') ps.log( 'Epoch:{epoch}, lr:{lr}, train_class_loss: {train_class_loss}, train_top5_acc: {train_top5_acc} %, train_top1_acc: {train_top1_acc} %, \ val_class_loss: {val_class_loss}, val_top5_acc: {val_top5_acc} %, val_top1_acc: {val_top1_acc} %' .format(epoch=epoch, lr=lr, train_class_loss=class_loss_meter.value()[0], train_top5_acc=class_acc_top5_meter.value()[0], train_top1_acc=class_acc_top1_meter.value()[0], val_class_loss=val_class_loss, val_top5_acc=val_top5_acc, val_top1_acc=val_top1_acc)) # adjust lr if epoch in opt.lr_decay_step_list: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr
def train(**kwargs): setup_seed(opt.seed) kwargs.update({'model': 'PCNN_ONE'}) opt.parse(kwargs) if opt.use_gpu: torch.cuda.set_device(opt.gpu_id) # torch.manual_seed(opt.seed) model = getattr(models, 'PCNN_ONE')(opt) if opt.use_gpu: # torch.cuda.manual_seed_all(opt.seed) model.cuda() # parallel # model = nn.DataParallel(model) # loading data DataModel = getattr(dataset, opt.data + 'Data') train_data = DataModel(opt.data_root, train=True) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = DataModel(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('train data: {}; test data: {}'.format(len(train_data), len(test_data))) criterion = nn.CrossEntropyLoss() optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay) # optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, betas=(0.9, 0.999), weight_decay=opt.weight_decay) # optimizer = optim.Adadelta(model.parameters(), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay) # train print("start training...") max_pre = -1.0 max_rec = -1.0 for epoch in range(opt.num_epochs): total_loss = 0 for idx, (data, label_set) in enumerate(train_data_loader): label = [l[0] for l in label_set] if opt.use_gpu: label = torch.LongTensor(label).cuda() else: label = torch.LongTensor(label) data = select_instance(model, data, label) model.batch_size = opt.batch_size optimizer.zero_grad() out = model(data, train=True) loss = criterion(out, label) loss.backward() optimizer.step() total_loss += loss.item() if epoch < -1: continue true_y, pred_y, pred_p = predict(model, test_data_loader) all_pre, all_rec, fp_res = eval_metric(true_y, pred_y, pred_p) last_pre, last_rec = all_pre[-1], all_rec[-1] if last_pre > 0.24 and last_rec > 0.24: save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, fp_res, opt=opt.print_opt) print('{} Epoch {} save pr'.format(now(), epoch + 1)) if last_pre > max_pre and last_rec > max_rec: print("save model") max_pre = last_pre max_rec = last_rec model.save(opt.print_opt) print('{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}'.format(now(), epoch + 1, opt.num_epochs, total_loss, last_pre, last_rec))
def train(**kwargs): kwargs.update({'model': 'PCNN_ATT'}) opt.parse(kwargs) if opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = getattr(models, 'PCNN_ATT')(opt) if opt.use_gpu: model.cuda() # loading data DataModel = getattr(dataset, opt.data + 'Data') train_data = DataModel(opt.data_root, train=True) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = DataModel(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('{} train data: {}; test data: {}'.format(now(), len(train_data), len(test_data))) # criterion and optimizer # criterion = nn.CrossEntropyLoss() optimizer = optim.Adadelta(model.parameters(), rho=0.95, eps=1e-6) # train # max_pre = -1.0 # max_rec = -1.0 for epoch in range(opt.num_epochs): total_loss = 0 for idx, (data, label_set) in enumerate(train_data_loader): label = [l[0] for l in label_set] optimizer.zero_grad() model.batch_size = opt.batch_size loss = model(data, label) if opt.use_gpu: label = torch.LongTensor(label).cuda() else: label = torch.LongTensor(label) loss.backward() optimizer.step() total_loss += loss.item() # if idx % 100 == 99: # print('{}: Train iter: {} finish'.format(now(), idx)) if epoch > 2: # true_y, pred_y, pred_p= predict(model, test_data_loader) # all_pre, all_rec = eval_metric(true_y, pred_y, pred_p) pred_res, p_num = predict_var(model, test_data_loader) all_pre, all_rec = eval_metric_var(pred_res, p_num) last_pre, last_rec = all_pre[-1], all_rec[-1] if last_pre > 0.24 and last_rec > 0.24: save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, opt=opt.print_opt) print('{} Epoch {} save pr'.format(now(), epoch + 1)) print( '{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}' .format(now(), epoch + 1, opt.num_epochs, total_loss, last_pre, last_rec)) else: print('{} Epoch {}/{}: train loss: {};'.format( now(), epoch + 1, opt.num_epochs, total_loss))
def train(**kwargs): opt.parse(**kwargs) # step1: configure model model = getattr(models,opt.model)(opt.num_class) if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogCat(opt.train_data_path, transform=opt.train_transform, train = True) val_data = DogCat(opt.train_data_path, transform=opt.test_val_transform, train = False, test= False) train_dataloader = DataLoader(train_data, batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(params=model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() # 用于统计一个epoch内的平均误差 confusion_matrix = meter.ConfusionMeter(opt.num_class) previous_loss=1e6 # step5: train vis = Visualizer(opt.env) for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii,(data, label) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() loss_meter.add(loss.data) confusion_matrix.add(score.data, target.data) # ipdb.set_trace() if ii%opt.print_freq == opt.print_freq-1: vis.plot(win='loss', y=loss_meter.value()[0]) model.save() # step6: validate and visualize val_confusion_matrix, val_accuracy = val(model, val_dataloader) vis.plot(win='val_accuracy',y=val_accuracy) vis.log(win='log_text', info= 'epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format( epoch=epoch,lr=lr,loss=loss_meter.value()[0],train_cm=str(confusion_matrix.value()),val_cm=str(val_confusion_matrix) ) ) # step7: update learning_rate if loss_meter.value()[0] > previous_loss: lr=lr*opt.lr_decay for param_group in optimizer.param_groups: param_group['lr']=lr previous_loss=loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)(opt) if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DocumentPair(opt.train_data_root, doc_type='train', suffix='txt', load=lambda x: x.strip().split(',')) train_data.initialize(vocab_size=opt.vocab_size) val_data = DocumentPair(opt.validate_data_root, doc_type='validate', suffix='txt', load=lambda x: x.strip().split(','), vocab=train_data.vocab) val_data.initialize() train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, batch in enumerate(train_dataloader): data_left, data_right, label, num_pos = load_data( batch, opt, train_data.vocab) # train model input_data_left, input_data_right = Variable( t.from_numpy(data_left)), Variable(t.from_numpy(data_right)) target = Variable(t.from_numpy(label)) if opt.use_gpu: input_data_left, input_data_right = input_data_left.cuda( ), input_data_right.cuda() target = target.cuda() optimizer.zero_grad() scores, predictions = model((input_data_left, input_data_right)) loss = criterion(scores, target.max(1)[1]) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data[0]) confusion_matrix.add(predictions.data, target.max(1)[1].data) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() model.save() # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): kwargs.update({'model': 'PCNN_ONE'}) opt.parse(kwargs) if opt.use_gpu: torch.cuda.set_device(opt.gpu_id) # torch.manual_seed(opt.seed) model = getattr(models, 'PCNN_ONE')(opt) if opt.use_gpu: # torch.cuda.manual_seed_all(opt.seed) model.cuda() # model = nn.DataParallel(model) # loading data DataModel = getattr(dataset, opt.data + 'Data') train_data = DataModel(opt.data_root, train=True) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = DataModel(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('train data: {}; test data: {}'.format(len(train_data), len(test_data))) criterion = nn.CrossEntropyLoss() optimizer = optim.Adadelta(model.parameters(), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay) # train print("start training...") max_f1_score = -1. for epoch in range(opt.num_epochs): #print epoch total_loss = 0 for idx, (data, label_set) in enumerate(train_data_loader): #print idx #print data[0] label = [l[0] for l in label_set] if opt.use_gpu: label = torch.LongTensor(label).cuda() else: label = torch.LongTensor(label) data = select_instance(model, data, label) model.batch_size = opt.batch_size optimizer.zero_grad() out = model(data) loss = criterion(out, Variable(label)) loss.backward() optimizer.step() total_loss += loss.data[0] if epoch < -3: continue true_y, pred_y, pred_p = predict(model, test_data_loader) single_true_y = [] for t_y in true_y: single_true_y.append(t_y[0]) #print single_true_y f1score = f1_score(single_true_y, pred_y, average='macro') precision = precision_score(single_true_y, pred_y, average='macro') recall = recall_score(single_true_y, pred_y, average='macro') f1score_class = f1_score(single_true_y, pred_y, average=None) precision_class = precision_score(single_true_y, pred_y, average=None) recall_class = recall_score(single_true_y, pred_y, average=None) #print pred_y if f1score > max_f1_score: max_f1_score = f1score print('save the model') torch.save(model, opt.load_model_path) #print true_y[:10], pred_y[:10] ''' all_pre, all_rec, fp_res = eval_metric(true_y, pred_y, pred_p) last_pre, last_rec = all_pre[-1], all_rec[-1] if last_pre > 0.24 and last_rec > 0.24: save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, fp_res, opt=opt.print_opt) print('{} Epoch {} save pr'.format(now(), epoch + 1)) if last_pre > max_pre and last_rec > max_rec: print("save model") max_pre = last_pre max_rec = last_rec model.save(opt.print_opt) ''' print(precision_class, recall_class, f1score_class) print( '{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}, f1_score {}' .format(now(), epoch + 1, opt.num_epochs, total_loss, precision, recall, f1score))
def train(**kwargs): opt.parse(kwargs) if opt.vis: vis = Visualizer(opt.env) # step 1: configure model model = getattr(models, opt.model)(opt) if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step 2: data train_data = Small(opt.train_root, wv_path=opt.word2vec_path, stopwords_path=opt.stopwords_path, idf_path=opt.idf_train_path, train=True) # val_data = Small(opt.train_root, # wv_path=opt.word2vec_path, # stopwords_path=opt.stopwords_path, # train=False) data_size = len(train_data) indices = t.randperm(data_size) # step 3: criterion and optimizer criterion = t.nn.KLDivLoss() lr = opt.lr optimizer = Adamax(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step 4: meters previous_loss = float('inf') # train for epoch in range(opt.max_epoch): for i in tqdm(range(0, data_size, opt.batch_size)): batch_size = min(opt.batch_size, data_size - i) # train_model loss = 0. for j in range(0, batch_size): idx = indices[i + j] q, a, label, shallow_features = train_data[idx] input_q, input_a, shallow_features = Variable(q), Variable( a), Variable(shallow_features) target = Variable(label) if opt.use_gpu: input_q = input_q.cuda() input_a = input_a.cuda() shallow_features = shallow_features.cuda() target = target.cuda() score = model(input_q, input_a, shallow_features) example_loss = criterion(score, target) loss += example_loss loss /= opt.batch_size optimizer.zero_grad() loss.backward() optimizer.step() model.save(model.module_name + '_' + str(epoch) + '.pth') print('epoch:{epoch}, lr:{lr}, loss:{loss}'.format(epoch=epoch, loss=loss.data, lr=lr)) # # validate and visualize # map, mrr = val(model, val_data) # # print('epoch:{epoch}, lr:{lr}, loss:{loss}, map:{map}, mrr:{mrr}'.format( # epoch=epoch, # loss=loss.data, # map=map, # mrr=mrr, # lr=lr # )) # update learning rate if (loss.data > previous_loss).all(): lr = lr * opt.lr_decay previous_loss = loss.data
def main(**kwargs): #动态加全职衰减 origin_weight_decay = 1e-5 opt.parse(kwargs, print_=False) if opt.debug: import ipdb ipdb.set_trace() ################################### # opt.model_names=['MultiCNNTextBNDeep','CNNText_inception', # #'RCNN', # 'LSTMText','CNNText_inception'] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410330780091','checkpoints/CNNText_tmp_word_0.41096749885', # #'checkpoints/RCNN_word_0.411511574999', # 'checkpoints/LSTMText_word_0.411994005382','checkpoints/CNNText_tmp_char_0.402429167301'] ###################################### # opt.model_names=['MultiCNNTextBNDeep', # #'CNNText_inception', # #'RCNN', # 'LSTMText_boost', # #'CNNText_inception_boost' # ] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410330780091', # # 'checkpoints/CNNText_tmp_word_0.41096749885', # #'checkpoints/RCNN_word_0.411511574999', # 'checkpoints/LSTMText_word_0.381833388089', # #'checkpoints/CNNText_tmp_0.376364647145' # ] #####################################################3 opt.model_names = [ 'MultiCNNTextBNDeep', #'RCNN', 'LSTMText', 'CNNText_inception', 'CNNText_inception-boost' ] opt.model_paths = [ 'checkpoints/MultiCNNTextBNDeep_word_0.410330780091', 'checkpoints/LSTMText_word_0.381833388089', 'checkpoints/CNNText_tmp_0.380390420742', #'checkpoints/RCNN_word_0.411511574999', 'checkpoints/CNNText_tmp_0.376364647145' ] # opt.model_path='checkpoints/BoostModel_word_0.412524727048' #************************************** #############################################3 opt.model_names = [ 'MultiCNNTextBNDeep', 'LSTMText', 'MultiCNNTextBNDeep-boost' ] opt.model_paths = [ 'checkpoints/MultiCNNTextBNDeep_word_0.410330780091', 'checkpoints/LSTMText_word_0.411994005382', None ] opt.model_path = 'checkpoints/BoostModel2_word_0.410618920827' #********************************************* # opt.model_names=['MultiCNNTextBNDeep','LSTMText','CNNText_inception','RCNN'] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/LSTMText_word_0.381833388089','checkpoints/CNNText_tmp_0.376364647145','checkpoints/RCNN_char_0.3456599248'] model = getattr(models, opt.model)(opt).cuda() # if opt.model_path: # model.load(opt.model_path) print(model) opt.parse(kwargs, print_=True) vis.reinit(opt.env) pre_loss = 1.0 lr, lr2 = opt.lr, opt.lr2 loss_function = getattr(models, opt.loss)() if opt.all: dataset = ZhihuALLData(opt.train_data_path, opt.labels_path, type_=opt.type_) else: dataset = ZhihuData(opt.train_data_path, opt.labels_path, type_=opt.type_) dataloader = data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=True) optimizer = model.get_optimizer(opt.lr, opt.lr2, 0) loss_meter = tnt.meter.AverageValueMeter() score_meter = tnt.meter.AverageValueMeter() best_score = 0 # pre_score = 0 for epoch in range(opt.max_epoch): loss_meter.reset() score_meter.reset() for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)): title, content, label = (Variable( title[0].cuda()), Variable(title[1].cuda())), (Variable( content[0].cuda()), Variable(content[1].cuda())), Variable( label.cuda()) optimizer.zero_grad() score = model(title, content) loss = loss_function(score, opt.weight * label.float()) loss_meter.add(loss.data[0]) loss.backward() optimizer.step() if ii % opt.plot_every == opt.plot_every - 1: if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() predict = score.data.topk(5, dim=1)[1].cpu().tolist( ) #(dim=1,descending=True)[1][:,:5].tolist() true_target = label.data.float().topk( 5, dim=1) #[1].cpu().tolist()#sort(dim=1,descending=True) true_index = true_target[1][:, :5] true_label = true_target[0][:, :5] predict_label_and_marked_label_list = [] for jj in range(label.size(0)): true_index_ = true_index[jj] true_label_ = true_label[jj] true = true_index_[true_label_ > 0] predict_label_and_marked_label_list.append( (predict[jj], true.tolist())) score_, prec_, recall_, _ss = get_score( predict_label_and_marked_label_list) score_meter.add(score_) vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' % (prec_, recall_, score_, _ss), win='tmp') vis.plot('scores', score_meter.value()[0]) #eval() vis.plot('loss', loss_meter.value()[0]) # 随机展示一个输出的分布 k = t.randperm(label.size(0))[0] output = t.nn.functional.sigmoid(score) # vis.vis.histogram( # output.data[k].view(-1).cpu(), win=u'output_hist', opts=dict # (title='output_hist')) # print "epoch:%4d/%4d,time: %.8f,loss: %.8f " %(epoch,ii,time.time()-start,loss_meter.value()[0]) if ii % opt.decay_every == opt.decay_every - 1: del loss scores, prec_, recall_, _ss = val(model, dataset) if scores > best_score: best_score = scores best_path = model.save(name=str(scores), new=True) vis.log({ ' epoch:': epoch, ' lr: ': lr, 'scores': scores, 'prec': prec_, 'recall': recall_, 'ss': _ss, 'scores_train': score_meter.value()[0], 'loss': loss_meter.value()[0] }) if scores < best_score: model.load(best_path, change_opt=False) #lr = lr*opt.lr_decay #optimizer = model.get_optimizer(lr) lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等的丢失 if lr2 == 0: lr2 = 2e-4 else: lr2 = lr2 * opt.lr_decay optimizer = model.get_optimizer(lr, lr2, 0) origin_weight_decay = 5 * origin_weight_decay # optimizer = model.get_optimizer(lr,lr2,0,weight_decay=origin_weight_decay) # origin_weight_decay=5*origin_weight_decay # for param_group in optimizer.param_groups: # param_group['lr'] *= opt.lr_decay # if param_group['lr'] ==0: # param_group['lr'] = 1e-4 pre_loss = loss_meter.value()[0] # pre_score = score_meter.value()[0] # pre_score = scores loss_meter.reset() score_meter.reset() if lr < opt.min_lr: break
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # 数据设定 户籍科 010 82640433 train_data = DogCat(opt.load_model_path, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) train_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # 目标函数和优化器 criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model) # 统计指标,平滑处理之后的损失 loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm( enumerate(train_dataloader)): # ii num ,(data,label) enumerate # 训练模型参数 input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.stop() # 更新统计指标及可视化 loss_meter.add(loss.data[0]) confusion_matrix.add(loss.data[0]) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) if os.path.exist(opt.debug_file): import ipdb ipdb.set_trace() model.save() # 计算验证集上的指标及其可视化 val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( 'epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}' .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) # vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader), total=len(train_data)): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize # loss_meter.add(loss.data[0]) loss_meter.add(loss.item()) confusion_matrix.add(score.data, target.data) # if ii % opt.print_freq == opt.print_freq - 1: # vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 # if os.path.exists(opt.debug_file): # import ipdb; # ipdb.set_trace() model.save() # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) # vis.plot('val_accuracy', val_accuracy) # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( # epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), # lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) # log file logfile_name = "Cmpr_with_YOLOv2_" + opt.exp_desc + time.strftime( "_%m_%d_%H:%M:%S") + ".log.txt" ps = PlotSaver(logfile_name) # step1: Model model = getattr(models, opt.model)( use_imp=opt.use_imp, n=opt.feat_num, input_4_ch=opt.input_4_ch, model_name="Cmpr_yolo_imp_" + opt.exp_desc + "_r={r}_gama={w}".format( r=opt.rate_loss_threshold, w=opt.rate_loss_weight) if opt.use_imp else "Cmpr_yolo_no_imp_" + opt.exp_desc) # pdb.set_trace() if opt.use_gpu: model = multiple_gpu_process(model) cudnn.benchmark = True # step2: Data normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) train_data_transforms = transforms.Compose([ # transforms.RandomHorizontalFlip(), TODO: try to reimplement by myself to simultaneous operate on label and data transforms.ToTensor(), normalize ]) val_data_transforms = transforms.Compose( [transforms.ToTensor(), normalize]) train_data = ImageCropWithBBoxMaskDataset( opt.train_data_list, train_data_transforms, contrastive_degree=opt.contrastive_degree, mse_bbox_weight=opt.input_original_bbox_weight) val_data = ImageCropWithBBoxMaskDataset( opt.val_data_list, val_data_transforms, contrastive_degree=opt.contrastive_degree, mse_bbox_weight=opt.input_original_bbox_weight) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) # step3: criterion and optimizer mse_loss = t.nn.MSELoss(size_average=False) if opt.use_imp: # TODO: new rate loss rate_loss = RateLoss(opt.rate_loss_threshold, opt.rate_loss_weight) # rate_loss = LimuRateLoss(opt.rate_loss_threshold, opt.rate_loss_weight) def weighted_mse_loss(input, target, weight): # weight[weight!=opt.mse_bbox_weight] = 1 # weight[weight==opt.mse_bbox_weight] = opt.mse_bbox_weight # print('max val', weight.max()) # return mse_loss(input, target) # weight_clone = weight.clone() # weight_clone[weight_clone == opt.input_original_bbox_weight] = 0 # return t.sum(weight_clone * (input - target) ** 2) weight_clone = t.ones_like(weight) weight_clone[weight == opt.input_original_bbox_inner] = opt.mse_bbox_weight return t.sum(weight_clone * (input - target)**2) def yolo_rate_loss(imp_map, mask_r): return rate_loss(imp_map) # V2 contrastive_degree must be 0! # return YoloRateLossV2(mask_r, opt.rate_loss_threshold, opt.rate_loss_weight)(imp_map) lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999)) start_epoch = 0 decay_file_create_time = -1 # 为了避免同一个文件反复衰减学习率, 所以判断修改时间 if opt.resume: if use_data_parallel: start_epoch = model.module.load( None if opt.finetune else optimizer, opt.resume, opt.finetune) else: start_epoch = model.load(None if opt.finetune else optimizer, opt.resume, opt.finetune) if opt.finetune: print('Finetune from model checkpoint file', opt.resume) else: print('Resume training from checkpoint file', opt.resume) print('Continue training at epoch %d.' % start_epoch) # step4: meters mse_loss_meter = AverageValueMeter() if opt.use_imp: rate_loss_meter = AverageValueMeter() rate_display_meter = AverageValueMeter() total_loss_meter = AverageValueMeter() previous_loss = 1e100 tolerant_now = 0 same_lr_epoch = 0 # ps init ps.new_plot('train mse loss', opt.print_freq, xlabel="iteration", ylabel="train_mse_loss") ps.new_plot('val mse loss', 1, xlabel="epoch", ylabel="val_mse_loss") if opt.use_imp: ps.new_plot('train rate value', opt.print_freq, xlabel="iteration", ylabel="train_rate_value") ps.new_plot('train rate loss', opt.print_freq, xlabel="iteration", ylabel="train_rate_loss") ps.new_plot('train total loss', opt.print_freq, xlabel="iteration", ylabel="train_total_loss") ps.new_plot('val rate value', 1, xlabel="iteration", ylabel="val_rate_value") ps.new_plot('val rate loss', 1, xlabel="iteration", ylabel="val_rate_loss") ps.new_plot('val total loss', 1, xlabel="iteration", ylabel="val_total_loss") for epoch in range(start_epoch + 1, opt.max_epoch + 1): same_lr_epoch += 1 # per epoch avg loss meter mse_loss_meter.reset() if opt.use_imp: rate_display_meter.reset() rate_loss_meter.reset() total_loss_meter.reset() else: total_loss_meter = mse_loss_meter # cur_epoch_loss refresh every epoch # vis.refresh_plot('cur epoch train mse loss') ps.new_plot("cur epoch train mse loss", opt.print_freq, xlabel="iteration in cur epoch", ylabel="train_mse_loss") # progress_bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader), ascii=True) # progress_bar.set_description('epoch %d/%d, loss = 0.00' % (epoch, opt.max_epoch)) # Init val if (epoch == start_epoch + 1) and opt.init_val: print('Init validation ... ') if opt.use_imp: mse_val_loss, rate_val_loss, total_val_loss, rate_val_display = val( model, val_dataloader, weighted_mse_loss, yolo_rate_loss, ps) else: mse_val_loss = val(model, val_dataloader, weighted_mse_loss, None, ps) ps.add_point('val mse loss', mse_val_loss) if opt.use_imp: ps.add_point('val rate value', rate_val_display) ps.add_point('val rate loss', rate_val_loss) ps.add_point('val total loss', total_val_loss) ps.make_plot('val mse loss') if opt.use_imp: ps.make_plot('val rate value') ps.make_plot('val rate loss') ps.make_plot('val total loss') # log sth. if opt.use_imp: ps.log( 'Init Val @ Epoch:{epoch}, lr:{lr}, val_mse_loss: {val_mse_loss}, val_rate_loss: {val_rate_loss}, val_total_loss: {val_total_loss}, val_rate_display: {val_rate_display} ' .format(epoch=epoch, lr=lr, val_mse_loss=mse_val_loss, val_rate_loss=rate_val_loss, val_total_loss=total_val_loss, val_rate_display=rate_val_display)) else: ps.log( 'Init Val @ Epoch:{epoch}, lr:{lr}, val_mse_loss:{val_mse_loss}' .format(epoch=epoch, lr=lr, val_mse_loss=mse_val_loss)) if opt.only_init_val: print('Only Init Val Over!') return model.train() if epoch == start_epoch + 1: print('Start training, please inspect log file %s!' % logfile_name) # mask is the detection bounding box mask for idx, (data, mask, o_mask) in enumerate(train_dataloader): # pdb.set_trace() data = Variable(data) mask = Variable(mask) o_mask = Variable(o_mask, requires_grad=False) if opt.use_gpu: data = data.cuda(async=True) mask = mask.cuda(async=True) o_mask = o_mask.cuda(async=True) # pdb.set_trace() optimizer.zero_grad() reconstructed, imp_mask_sigmoid = model(data, mask, o_mask) # print ('imp_mask_height', model.imp_mask_height) # pdb.set_trace() # print ('type recons', type(reconstructed.data)) loss = weighted_mse_loss(reconstructed, data, o_mask) # loss = mse_loss(reconstructed, data) caffe_loss = loss / (2 * opt.batch_size) if opt.use_imp: rate_loss_display = imp_mask_sigmoid # rate_loss_ = rate_loss(rate_loss_display) rate_loss_ = yolo_rate_loss(rate_loss_display, mask) total_loss = caffe_loss + rate_loss_ else: total_loss = caffe_loss total_loss.backward() optimizer.step() mse_loss_meter.add(caffe_loss.data[0]) if opt.use_imp: rate_loss_meter.add(rate_loss_.data[0]) rate_display_meter.add(rate_loss_display.data.mean()) total_loss_meter.add(total_loss.data[0]) if idx % opt.print_freq == opt.print_freq - 1: ps.add_point( 'train mse loss', mse_loss_meter.value()[0] if opt.print_smooth else caffe_loss.data[0]) ps.add_point( 'cur epoch train mse loss', mse_loss_meter.value()[0] if opt.print_smooth else caffe_loss.data[0]) if opt.use_imp: ps.add_point( 'train rate value', rate_display_meter.value()[0] if opt.print_smooth else rate_loss_display.data.mean()) ps.add_point( 'train rate loss', rate_loss_meter.value()[0] if opt.print_smooth else rate_loss_.data[0]) ps.add_point( 'train total loss', total_loss_meter.value()[0] if opt.print_smooth else total_loss.data[0]) if not opt.use_imp: ps.log('Epoch %d/%d, Iter %d/%d, loss = %.2f, lr = %.8f' % (epoch, opt.max_epoch, idx, len(train_dataloader), total_loss_meter.value()[0], lr)) else: ps.log( 'Epoch %d/%d, Iter %d/%d, loss = %.2f, mse_loss = %.2f, rate_loss = %.2f, rate_display = %.2f, lr = %.8f' % (epoch, opt.max_epoch, idx, len(train_dataloader), total_loss_meter.value()[0], mse_loss_meter.value()[0], rate_loss_meter.value()[0], rate_display_meter.value()[0], lr)) # 进入debug模式 if os.path.exists(opt.debug_file): pdb.set_trace() if epoch % opt.save_interval == 0: print('save checkpoint file of epoch %d.' % epoch) if use_data_parallel: model.module.save(optimizer, epoch) else: model.save(optimizer, epoch) ps.make_plot('train mse loss') ps.make_plot('cur epoch train mse loss', epoch) if opt.use_imp: ps.make_plot("train rate value") ps.make_plot("train rate loss") ps.make_plot("train total loss") if epoch % opt.eval_interval == 0: print('Validating ...') # val if opt.use_imp: mse_val_loss, rate_val_loss, total_val_loss, rate_val_display = val( model, val_dataloader, weighted_mse_loss, yolo_rate_loss, ps) else: mse_val_loss = val(model, val_dataloader, weighted_mse_loss, None, ps) ps.add_point('val mse loss', mse_val_loss) if opt.use_imp: ps.add_point('val rate value', rate_val_display) ps.add_point('val rate loss', rate_val_loss) ps.add_point('val total loss', total_val_loss) ps.make_plot('val mse loss') if opt.use_imp: ps.make_plot('val rate value') ps.make_plot('val rate loss') ps.make_plot('val total loss') # log sth. if opt.use_imp: ps.log( 'Epoch:{epoch}, lr:{lr}, train_mse_loss: {train_mse_loss}, train_rate_loss: {train_rate_loss}, train_total_loss: {train_total_loss}, train_rate_display: {train_rate_display} \n\ val_mse_loss: {val_mse_loss}, val_rate_loss: {val_rate_loss}, val_total_loss: {val_total_loss}, val_rate_display: {val_rate_display} ' .format(epoch=epoch, lr=lr, train_mse_loss=mse_loss_meter.value()[0], train_rate_loss=rate_loss_meter.value()[0], train_total_loss=total_loss_meter.value()[0], train_rate_display=rate_display_meter.value()[0], val_mse_loss=mse_val_loss, val_rate_loss=rate_val_loss, val_total_loss=total_val_loss, val_rate_display=rate_val_display)) else: ps.log( 'Epoch:{epoch}, lr:{lr}, train_mse_loss:{train_mse_loss}, val_mse_loss:{val_mse_loss}' .format(epoch=epoch, lr=lr, train_mse_loss=mse_loss_meter.value()[0], val_mse_loss=mse_val_loss)) # Adaptive adjust lr # 每个lr,如果有opt.tolerant_max次比上次的val_loss还高, # update learning rate # if loss_meter.value()[0] > previous_loss: if opt.use_early_adjust: if total_loss_meter.value()[0] > previous_loss: tolerant_now += 1 if tolerant_now == opt.tolerant_max: tolerant_now = 0 same_lr_epoch = 0 lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr print('Due to early stop anneal lr to %.10f at epoch %d' % (lr, epoch)) ps.log('Due to early stop anneal lr to %.10f at epoch %d' % (lr, epoch)) else: tolerant_now -= 1 if epoch % opt.lr_anneal_epochs == 0: # if same_lr_epoch and same_lr_epoch % opt.lr_anneal_epochs == 0: same_lr_epoch = 0 tolerant_now = 0 lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr print('Anneal lr to %.10f at epoch %d due to full epochs.' % (lr, epoch)) ps.log('Anneal lr to %.10f at epoch %d due to full epochs.' % (lr, epoch)) if opt.use_file_decay_lr and os.path.exists(opt.lr_decay_file): cur_mtime = os.path.getmtime(opt.lr_decay_file) if cur_mtime > decay_file_create_time: decay_file_create_time = cur_mtime lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr print( 'Anneal lr to %.10f at epoch %d due to decay-file indicator.' % (lr, epoch)) ps.log( 'Anneal lr to %.10f at epoch %d due to decay-file indicator.' % (lr, epoch)) previous_loss = total_loss_meter.value()[0]
def test(**kwargs): opt.parse(kwargs)
def train(**kwargs): opt.parse(kwargs) # step1: configure model #model = getattr(models, opt.model)() model = Pre_models.vgg19_bn(pretrained=True) model.classifier = torch.nn.Sequential( torch.nn.Linear(512 * 7 * 7, 4096), torch.nn.ReLU(True), torch.nn.Dropout(), torch.nn.Linear(4096, 4096), torch.nn.ReLU(True), torch.nn.Dropout(), torch.nn.Linear(4096, 2), ) #model = Pre_models.resnet152(pretrained=True); #model.classifier = torch.nn.Linear(2208, 2); #print(model) model = model.cuda() model = torch.nn.DataParallel(model) #if opt.load_model_path: # model.load(opt.load_model_path) #if opt.use_gpu: model.cuda() # step2: data train_data = CAG(opt.train_data_root, train=True) val_data = CAG(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer loss_func = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) previous_loss = 1.0 # train for epoch in range(opt.max_epoch): print('epoch {}'.format(epoch + 1)) train_num = 1 train_acc = 0 sum_loss = 0 batch_num = 0 for ii, (data, label) in enumerate(train_dataloader): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) probability = t.nn.functional.softmax(score, dim=1) _, result = torch.max(probability, 1) train_correct = (result == target.squeeze(0)).sum() train_acc += train_correct.item() train_num += target.size(0) loss = loss_func(score, target) sum_loss += loss.item() loss.backward() optimizer.step() batch_num += 1 print("当前loss:", sum_loss / batch_num) logger.scalar_summary('train_loss', sum_loss / batch_num, epoch) accuracy = train_acc / train_num logger.scalar_summary('train_accurancy', accuracy, epoch) if (epoch + 1) % 100 == 0: #if loss.item() < previous_loss: #model.save() lr = lr * opt.lr_decay print("当前学习率", lr) logger.scalar_summary('lr', lr, epoch) for param_group in optimizer.param_groups: #optimizer通过param_group来管理参数组. 通过更改param_group[‘lr’]的值来更改对应参数组的学习率。 param_group['lr'] = lr #previous_loss = loss.item() # validate and visualize if (epoch + 1) % 5 == 0: val_accuracy, val_loss = val(model, val_dataloader) print("验证集上准确率为:", val_accuracy) logger.scalar_summary('val_accurancy', val_accuracy, epoch) print("val_loss:", val_loss) logger.scalar_summary('val_loss', val_loss, epoch)
def train(**kwargs): # kwargs.update({'model': 'CNN'}) opt.parse(kwargs) if (opt.use_gpu): torch.cuda.set_device(opt.gpu_id) if opt.encoder == 'BERT': encoder_model = BertForSequenceClassification.from_pretrained( "./downloaded_weights/downloaded_bert_base_uncased", num_labels=opt.rel_num) # print(encoder_model) opt.encoder_out_dimension = opt.rel_num else: encoder_model = getattr(encoder_models, opt.encoder)(opt) opt.encoder_out_dimension = encoder_model.out_dimension selector_model = getattr(selector_models, opt.selector)(opt) # encoder_model = torch.nn.DataParallel(encoder_model, device_ids=[3,6]) if (opt.use_gpu): encoder_model = encoder_model.cuda() selector_model = selector_model.cuda() # Loading data DataModel = getattr(dataset, opt.data + 'Data') train_data = DataModel(opt.data_root, train=True, use_bert=opt.use_bert_tokenizer) train_data_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) print('train data: {}'.format(len(train_data))) test_data = DataModel(opt.data_root, train=False, use_bert=opt.use_bert_tokenizer) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('test data: {}'.format(len(test_data))) criterion = nn.CrossEntropyLoss() if opt.encoder == 'BERT': optimizer = AdamW( [{ 'params': encoder_model.parameters() }, { 'params': selector_model.parameters() }], lr=opt.lr, correct_bias=True ) # To reproduce BertAdam specific behavior set correct_bias=False else: optimizer = optim.Adadelta([{ 'params': encoder_model.parameters() }, { 'params': selector_model.parameters() }], lr=opt.lr, rho=1.0, eps=1e-6, weight_decay=opt.weight_decay) scheduler = WarmupLinearSchedule(optimizer, warmup_steps=2, t_total=3) # PyTorch scheduler ### and used like this: # for batch in train_data: # loss = model(batch) # loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # Gradient clipping is not in AdamW anymore (so you can use amp without issue) # optimizer.zero_grad() # if opt.encoder == "BERT" and False: # optimizer = optim.SGD([ # {'params': selector_model.parameters()} # ], lr=opt.lr) # else: optimizer = optim.SGD([{ 'params': encoder_model.parameters() }, { 'params': selector_model.parameters() }], lr=opt.lr) max_pre = 0.0 max_rec = 0.0 for epoch in range(opt.num_epochs): # if opt.encoder == "BERT": encoder_model.train() selector_model.train() print("*" * 50) print("Epoch {}".format(epoch)) total_loss = 0 max_insNum = 0 for batch_num, (data, label_set) in enumerate(train_data_loader): # if (batch_num>2000): # break # label_set is the label of each bag (there may be no more than 4 labels, but we only wants the first) labels = [] outs = torch.empty([0, 53]) empty = True # if all labels of bags in one batch are zeros, then it's empty, continue to avoid error for l in label_set: if (l[0] != 0): labels.append(l[0]) empty = False if empty: continue # labels = [l[0] for l in label_set] # Each time enters {batch_size} bags # Each time I want one bag!! # The model need to give me a representation of an instance!!! if opt.use_gpu: labels = torch.LongTensor(labels).cuda() outs = outs.cuda() else: labels = torch.LongTensor(labels) optimizer.zero_grad() train_cor = 0 for idx, bag in enumerate(data): insNum = bag[1] # if insNum > max_insNum: # max_insNum = insNum # print(max_insNum) label = label_set[idx][0] # Label of the current bag if (label_set[idx][0] == 0): continue ins_outs = torch.empty(0) instances = bag[2] pf_list = [] mask_list = [] if opt.encoder != 'BERT': pf_list = bag[3] mask_list = bag[5] # pf_list = bag[3] ins_out = torch.empty(0) encoder_model.batch_size = insNum if opt.use_gpu: instances = torch.LongTensor(instances).cuda() if opt.encoder == 'BERT': # with torch.no_grad(): # print(instances.size(0)) if insNum > opt.max_sentence_in_bag: ins_outs = encoder_model( instances[:opt.max_sentence_in_bag])[0] else: ins_outs = encoder_model(instances)[0] # ins_outs = ins_outs[0] # print(ins_outs[0].size()) else: for idx, instance in enumerate(instances): if opt.use_gpu: pfs = torch.LongTensor(pf_list[idx]).cuda() masks = torch.LongTensor(mask_list[idx]).cuda() else: pfs = torch.LongTensor(pf_list[idx]) masks = torch.LongTensor(mask_list[idx]) if opt.encoder == 'PCNN': ins_out = encoder_model(instance, pfs, masks) else: ins_out = encoder_model(instance, pfs) if (opt.use_gpu): ins_out = ins_out.cuda() ins_outs = ins_outs.cuda() ins_outs = torch.cat((ins_outs, ins_out), 0) del instance, ins_out if idx >= opt.max_sentence_in_bag: break bag_feature = selector_model(ins_outs) if opt.use_gpu: bag_feature = bag_feature.cuda() if (torch.max(bag_feature.squeeze(), 0)[1] == label): train_cor += 1 outs = torch.cat((outs, bag_feature), 0) del ins_outs, bag_feature # outs = outs.squeeze() # print("outs.size(): ", outs.size(), '\n', "labels.size(): ", labels.size()) # print(outs,labels) loss = criterion(outs, labels) total_loss += loss.item() avg_loss = total_loss / (batch_num + 1) sys.stdout.write( "\rbatch number: {:6d}\tloss: {:7.4f}\ttrain_acc: {:7.2f}\t". format(batch_num, avg_loss, train_cor / len(labels))) sys.stdout.flush() # sys.stdout.write('\033') loss.backward() if opt.encoder == 'BERT': scheduler.step() optimizer.step() del outs, labels if (opt.skip_predict != True): with torch.no_grad(): predict(encoder_model, selector_model, test_data_loader) t = time.strftime('%m_%d_%H_%M.pth') torch.save(encoder_model.state_dict(), 'checkpoints/{}_{}'.format(opt.encoder, t)) torch.save(selector_model.state_dict(), 'checkpoints/{}_{}'.format(opt.selector, t))
path=args.path4AffGraph, img_name=img_name, path4data=args.path4data) else: return None def __len__(self): return len(self.train_file) if __name__ == "__main__": # test the corecness of the dataset args.parse(hid_unit=40, max_epoch=250, drop_rate=.3, path4train_images=args.path4train_aug_images, path4AffGraph=os.path.join("..", "psa", "AFF_MAT_normalize"), path4partial_label_label=os.path.join( "..", "psa", "RES38_PARTIAL_PSEUDO_LABEL_DN"), path4node_feat=os.path.join("..", "psa", "AFF_FEATURE_res38")) if getpass.getuser() == "u7577591": args.path4node_feat = os.path.join("/work/u7577591/", "irn/AFF_FEATURE_res50_W") args.path4partial_label_label = "data/partial_pseudo_label/" + "label/" + "RES_CAM_TRAIN_AUG_PARTIAL_PSEUDO_LABEL" + "@PIL_near@confident_ratio_" + "0.3_cam_DN_johnney" args.path4AffGraph = os.path.join("/work/u7577591/irn", "AFF_MAT_normalize_IRNet") dataset = graph_voc() import time from utils import show_timing t_start = time.time() for i, item in enumerate(dataset, start=1):
def main(**kwargs): ''' 训练入口 ''' opt.parse(kwargs, print_=False) if opt.debug: import ipdb ipdb.set_trace() model = getattr(models, opt.model)(opt).cuda() if opt.model_path: model.load(opt.model_path) print(model) opt.parse(kwargs, print_=True) vis.reinit(opt.env) pre_loss = 1.0 lr, lr2 = opt.lr, opt.lr2 loss_function = getattr(models, opt.loss)() dataset = ZhihuData(opt.train_data_path, opt.labels_path, type_=opt.type_, augument=opt.augument) dataloader = data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=True) optimizer = model.get_optimizer(lr, opt.lr2, opt.weight_decay) loss_meter = tnt.meter.AverageValueMeter() score_meter = tnt.meter.AverageValueMeter() best_score = 0 for epoch in range(opt.max_epoch): loss_meter.reset() score_meter.reset() for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)): # 训练 更新参数 title, content, label = Variable(title.cuda()), Variable( content.cuda()), Variable(label.cuda()) optimizer.zero_grad() score = model(title, content) loss = loss_function(score, opt.weight * label.float()) loss_meter.add(loss.data[0]) loss.backward() optimizer.step() if ii % opt.plot_every == opt.plot_every - 1: ### 可视化 if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() predict = score.data.topk(5, dim=1)[1].cpu().tolist() true_target = label.data.float().topk(5, dim=1) true_index = true_target[1][:, :5] true_label = true_target[0][:, :5] predict_label_and_marked_label_list = [] for jj in range(label.size(0)): true_index_ = true_index[jj] true_label_ = true_label[jj] true = true_index_[true_label_ > 0] predict_label_and_marked_label_list.append( (predict[jj], true.tolist())) score_, prec_, recall_, _ss = get_score( predict_label_and_marked_label_list) score_meter.add(score_) vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' % (prec_, recall_, score_, _ss), win='tmp') vis.plot('scores', score_meter.value()[0]) #eval() vis.plot('loss', loss_meter.value()[0]) k = t.randperm(label.size(0))[0] output = t.nn.functional.sigmoid(score) if ii % opt.decay_every == opt.decay_every - 1: # 计算在验证集上的分数,并相对应的调整学习率 del loss scores, prec_, recall_, _ss = val(model, dataset) vis.log({ ' epoch:': epoch, ' lr: ': lr, 'scores': scores, 'prec': prec_, 'recall': recall_, 'ss': _ss, 'scores_train': score_meter.value()[0], 'loss': loss_meter.value()[0] }) if scores > best_score: best_score = scores best_path = model.save(name=str(scores), new=True) if scores < best_score: model.load(best_path, change_opt=False) lr = lr * opt.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr, lr2, 0) pre_loss = loss_meter.value()[0] loss_meter.reset() score_meter.reset()
def main(**kwargs): #动态加全职衰减 opt.parse(kwargs, print_=False) if opt.debug: import ipdb ipdb.set_trace() # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','CNNText_inception','RCNN','CNNText_inception','LSTMText'] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.41124002492','checkpoints/RCNN_word_0.411511574999','checkpoints/LSTMText_word_0.411994005382','checkpoints/CNNText_tmp_char_0.402429167301','checkpoints/RCNN_char_0.403710422571','checkpoints/CNNText_tmp_word_0.41096749885','checkpoints/LSTMText_char_0.403192339135',]#'checkpoints/FastText_word_0.400391584867'] #############################iMultiModelAll2_word_0.425600838271################################## # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception'] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.41124002492','checkpoints/RCNN_word_0.411511574999','checkpoints/LSTMText_word_0.411994005382','checkpoints/RCNN_char_0.403710422571','checkpoints/CNNText_tmp_char_0.402429167301'] ###################################################################################################3 ############################################################# # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception'] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/RCNN_word_0.373609030286','checkpoints/LSTMText_word_0.381833388089','checkpoints/RCNN_char_0.3456599248','checkpoints/CNNText_tmp_0.352036505041'] ##################################################################333 # opt.model_names=['LSTMText','MultiCNNTextBNDeep'] # opt.model_paths=['checkpoints/LSTMText_word_0.396765494482','checkpoints/MultiCNNTextBNDeep_word_0.391018392216'] # opt.fold=1 # from data.dataset import ALLFoldData as ZhihuALLData ######################################################################## # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception'] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/RCNN_word_0.373609030286','checkpoints/LSTMText_word_0.381833388089','checkpoints/RCNN_char_0.3456599248','checkpoints/CNNText_tmp_0.352036505041'] #######################################0.0.41884129858126845-force##################### # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','MultiCNNTextBNDeep'] # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410011182415','checkpoints/RCNN_word_0.413446202556','checkpoints/LSTMText_word_0.413681107036','checkpoints/RCNN_char_0.398655349075','checkpoints/MultiCNNTextBNDeep_char_0.38666657051'] ####################################################################### ############################################################################################ opt.model_names = [ 'MultiCNNTextBNDeep', 'FastText3', 'LSTMText', 'CNNText_inception' ] opt.model_paths = [ 'checkpoints/MultiCNNTextBNDeep_word_0.41124002492', 'checkpoints/FastText3_word_0.40810787337', 'checkpoints/LSTMText_word_0.413681107036', 'checkpoints/CNNText_tmp_char_0.402429167301' ] ########################################################################################3 model = getattr(models, opt.model)(opt).cuda() if opt.model_path: model.load(opt.model_path) print(model) opt.parse(kwargs, print_=True) vis.reinit(opt.env) pre_loss = 1.0 lr, lr2 = opt.lr, opt.lr2 loss_function = getattr(models, opt.loss)() if opt.all: dataset = ZhihuALLData(opt.train_data_path, opt.labels_path, type_=opt.type_, augument=opt.augument) # else :dataset = ZhihuData(opt.train_data_path,opt.labels_path,type_=opt.type_) dataloader = data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=True) optimizer = model.get_optimizer(opt.lr, opt.lr2) loss_meter = tnt.meter.AverageValueMeter() score_meter = tnt.meter.AverageValueMeter() best_score = 0 # pre_score = 0 for epoch in range(opt.max_epoch): loss_meter.reset() score_meter.reset() for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)): title, content, label = (Variable( title[0].cuda()), Variable(title[1].cuda())), (Variable( content[0].cuda()), Variable(content[1].cuda())), Variable( label.cuda()) optimizer.zero_grad() score = model(title, content) loss = loss_function(score, label.float()) loss_meter.add(loss.data[0]) loss.backward() optimizer.step() if ii % opt.plot_every == opt.plot_every - 1: if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() predict = score.data.topk(5, dim=1)[1].cpu().tolist( ) #(dim=1,descending=True)[1][:,:5].tolist() true_target = label.data.float().topk( 5, dim=1) #[1].cpu().tolist()#sort(dim=1,descending=True) true_index = true_target[1][:, :5] true_label = true_target[0][:, :5] predict_label_and_marked_label_list = [] for jj in range(label.size(0)): true_index_ = true_index[jj] true_label_ = true_label[jj] true = true_index_[true_label_ > 0] predict_label_and_marked_label_list.append( (predict[jj], true.tolist())) score_, prec_, recall_, _ss = get_score( predict_label_and_marked_label_list) score_meter.add(score_) vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' % (prec_, recall_, score_, _ss), win='tmp') vis.plot('scores', score_meter.value()[0]) vis.plot('loss', loss_meter.value()[0]) if ii % opt.decay_every == opt.decay_every - 1: del loss scores, prec_, recall_, _ss = val(model, dataset) vis.log({ ' epoch:': epoch, ' lr: ': lr, 'scores': scores, 'prec': prec_, 'recall': recall_, 'ss': _ss, 'scores_train': score_meter.value()[0], 'loss': loss_meter.value()[0] }) if scores > best_score: best_score = scores best_path = model.save(name=str(scores), new=True) if scores < best_score: model.load(best_path, change_opt=False) lr = lr * opt.lr_decay if lr2 == 0: lr2 = 1e-4 else: lr2 = lr2 * 0.5 optimizer = model.get_optimizer(lr, lr2, 0) pre_loss = loss_meter.value()[0] loss_meter.reset() score_meter.reset()
def __init__(self, **kwargs): opt.parse(kwargs) tl.set_backend('pytorch') self.dataset = "cifar" self.decomposed_layer_info = {'key': -1, 'image_size': -1, 'kernel_size': -1, 'stride': -1, 'padding': -1} self.layer_budget = {} self.origin_layer_runtime = {} self.origin_model_runtime = 0.0 self.VBMF_layer_rank = {} self.constrain = opt.constrain self.conv_target_rate = 0.0 self.fc_target_rate = 0.0 self.user_budget = 1 self.real_model_runtime = 0.0 self.remain_budget = 0.0 self.origin_model_constrain = 0.0 self.search_runtime = {} self.bayesian_iter = {} # Configure Logger self.logger = logging.getLogger() log_file = logging.FileHandler('result/log/test.log') self.logger.addHandler(log_file) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') log_file.setFormatter(formatter) self.logger.setLevel(logging.DEBUG) # Load Perf Model self.perf_model = Estimator() # Load Pre-trained Model if(opt.load_model_path is None): import sys print('set the model path') sys.exit(-1) else: checkpoint = torch.load(opt.load_model_path) if(type(checkpoint) is dict): checkpoint = checkpoint['net'] self.model = checkpoint.cuda() print(self.model) # Preparing data transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform_train) self.trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test) self.testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=4) print('\n{}Model Info'.format('\033[33m')) print('↓↓↓↓↓↓↓↓↓↓↓↓↓↓{}\n'.format('\033[0m')) # Set Criterion self.criterion = torch.nn.CrossEntropyLoss() # Calculate Image_size for each layer self.model_image_size = {} if(self.dataset == 'cifar'): in_image_size = 32 for i, key in enumerate(self.model.features._modules.keys()): if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): conv_layer = self.model.features._modules[key] after_image_size = ((in_image_size - conv_layer.kernel_size[0] + 2*conv_layer.padding[0]) // conv_layer.stride[0] )+ 1 self.model_image_size[key] = [in_image_size, after_image_size] in_image_size = after_image_size elif isinstance(self.model.features._modules[key], torch.nn.modules.MaxPool2d): maxpool_layer = self.model.features._modules[key] after_image_size = ((in_image_size - maxpool_layer.kernel_size) // maxpool_layer.stride )+ 1 self.model_image_size[key] = [in_image_size, after_image_size] in_image_size = after_image_size print('{}Image_Size{}: {}'.format('\033[36m', '\033[0m', self.model_image_size)) # Get Origin MAC and Weight and runtime self.origin_mac, self.origin_weight = self.get_model_mac_weight(self.model) self.origin_model_runtime, self.origin_layer_runtime = self.get_model_predict_runtime(self.model) self.origin_model_constrain, _ = self.get_model_predict_runtime_without_small(self.model) #print('self.origin_model_runtime: {}, self.get_model_predict_runtime: {}'.format(self.origin_model_runtime, self.get_model_predict_runtime(self.model))) #deploy to target save_model_name = export_onnx_model(self.model) decomp_runtime = deploy_by_rpc(save_model_name) self.real_model_runtime = decomp_runtime * 1000 os.remove(save_model_name) print('{}Origin_MAC{}: {}, {}Origin_Weight{}: {}'.format('\033[36m', '\033[0m', self.origin_mac, '\033[36m', '\033[0m', self.origin_weight)) #print('{}Origin_Weight{}: {}'.format('\033[36m', '\033[0m', self.origin_weight)) print('{}Pred_Origin_Runtime{}: {}, {}Real_Origin_Runtime{}: {}'.format('\033[36m', '\033[0m', self.origin_model_runtime, '\033[36m', '\033[0m', self.real_model_runtime)) #print('{}Real_Origin_Runtime{}: {}'.format('\033[36m', '\033[0m', self.real_model_runtime)) print('{}Origin_Layer_Runtime{}: {}'.format('\033[36m', '\033[0m', self.origin_layer_runtime)) print('{}Origin_Model_Constrain{}: {}'.format('\033[36m', '\033[0m', self.origin_model_constrain)) self.VBMF_layer_rank = self.get_VBMF_layer_rank() if(self.constrain > 0): # Calculate importance for each layer self.layer_importance = self.get_layer_importance() print('{}Layer Importance{}: {}'.format('\033[36m', '\033[0m', self.layer_importance)) # Get Layer Budget self.layer_budget = self.get_layer_budget()
def train(**kwargs): opt.parse(kwargs) # vis = Visualizer(opt.env) # log file ps = PlotSaver("Train_ImageNet12_With_ImpMap_" + time.strftime("%m_%d_%H:%M:%S") + ".log.txt") # step1: Model model = getattr(models, opt.model)( use_imp=opt.use_imp, model_name="CWCNN_limu_ImageNet_imp_r={r}_γ={w}".format( r=opt.rate_loss_threshold, w=opt.rate_loss_weight) if opt.use_imp else None) # if opt.use_imp else "test_pytorch") if opt.use_gpu: # model = multiple_gpu_process(model) model.cuda() # pdb.set_trace() cudnn.benchmark = True # step2: Data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_data_transforms = transforms.Compose([ transforms.Resize(256), #transforms.Scale(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) val_data_transforms = transforms.Compose([ transforms.Resize(256), #transforms.Scale(256), transforms.CenterCrop(224), # transforms.TenCrop(224), # transforms.Lambda(lambda crops: t.stack(([normalize(transforms.ToTensor()(crop)) for crop in crops]))), transforms.ToTensor(), normalize ]) # train_data = ImageNet_200k(opt.train_data_root, train=True, transforms=data_transforms) # val_data = ImageNet_200k(opt.val_data_root, train = False, transforms=data_transforms) train_data = datasets.ImageFolder(opt.train_data_root, train_data_transforms) val_data = datasets.ImageFolder(opt.val_data_root, val_data_transforms) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) # step3: criterion and optimizer mse_loss = t.nn.MSELoss(size_average=False) if opt.use_imp: # rate_loss = RateLoss(opt.rate_loss_threshold, opt.rate_loss_weight) rate_loss = LimuRateLoss(opt.rate_loss_threshold, opt.rate_loss_weight) lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999)) start_epoch = 0 if opt.resume: if hasattr(model, 'module'): start_epoch = model.module.load( None if opt.finetune else optimizer, opt.resume, opt.finetune) else: start_epoch = model.load(None if opt.finetune else optimizer, opt.resume, opt.finetune) if opt.finetune: print('Finetune from model checkpoint file', opt.resume) else: print('Resume training from checkpoint file', opt.resume) print('Continue training at epoch %d.' % start_epoch) # step4: meters mse_loss_meter = AverageValueMeter() if opt.use_imp: rate_loss_meter = AverageValueMeter() rate_display_meter = AverageValueMeter() total_loss_meter = AverageValueMeter() previous_loss = 1e100 tolerant_now = 0 same_lr_epoch = 0 # ps init ps.new_plot('train mse loss', opt.print_freq, xlabel="iteration", ylabel="train_mse_loss") ps.new_plot('val mse loss', 1, xlabel="epoch", ylabel="val_mse_loss") if opt.use_imp: ps.new_plot('train rate value', opt.print_freq, xlabel="iteration", ylabel="train_rate_value") ps.new_plot('train rate loss', opt.print_freq, xlabel="iteration", ylabel="train_rate_loss") ps.new_plot('train total loss', opt.print_freq, xlabel="iteration", ylabel="train_total_loss") ps.new_plot('val rate value', 1, xlabel="iteration", ylabel="val_rate_value") ps.new_plot('val rate loss', 1, xlabel="iteration", ylabel="val_rate_loss") ps.new_plot('val total loss', 1, xlabel="iteration", ylabel="val_total_loss") for epoch in range(start_epoch + 1, opt.max_epoch + 1): same_lr_epoch += 1 # per epoch avg loss meter mse_loss_meter.reset() if opt.use_imp: rate_display_meter.reset() rate_loss_meter.reset() total_loss_meter.reset() else: total_loss_meter = mse_loss_meter # cur_epoch_loss refresh every epoch ps.new_plot("cur epoch train mse loss", opt.print_freq, xlabel="iteration in cur epoch", ylabel="train_mse_loss") model.train() # _ is corresponding Label, compression doesn't use it. for idx, (data, _) in enumerate(train_dataloader): ipt = Variable(data) if opt.use_gpu: ipt = ipt.cuda() optimizer.zero_grad() # f**k it! Don't forget to clear grad! reconstructed = model(ipt) # print ('reconstructed tensor size :', reconstructed.size()) loss = mse_loss(reconstructed, ipt) caffe_loss = loss / (2 * opt.batch_size) if opt.use_imp: # print ('use data_parallel?',use_data_parallel) # pdb.set_trace() rate_loss_display = (model.module if use_data_parallel else model).imp_mask_sigmoid rate_loss_ = rate_loss(rate_loss_display) total_loss = caffe_loss + rate_loss_ else: total_loss = caffe_loss total_loss.backward() optimizer.step() mse_loss_meter.add(caffe_loss.data[0]) if opt.use_imp: rate_loss_meter.add(rate_loss_.data[0]) rate_display_meter.add(rate_loss_display.data.mean()) total_loss_meter.add(total_loss.data[0]) if idx % opt.print_freq == opt.print_freq - 1: ps.add_point( 'train mse loss', mse_loss_meter.value()[0] if opt.print_smooth else caffe_loss.data[0]) ps.add_point( 'cur epoch train mse loss', mse_loss_meter.value()[0] if opt.print_smooth else caffe_loss.data[0]) if opt.use_imp: ps.add_point( 'train rate value', rate_display_meter.value()[0] if opt.print_smooth else rate_loss_display.data.mean()) ps.add_point( 'train rate loss', rate_loss_meter.value()[0] if opt.print_smooth else rate_loss_.data[0]) ps.add_point( 'train total loss', total_loss_meter.value()[0] if opt.print_smooth else total_loss.data[0]) if not opt.use_imp: ps.log('Epoch %d/%d, Iter %d/%d, loss = %.2f, lr = %.8f' % (epoch, opt.max_epoch, idx, len(train_dataloader), total_loss_meter.value()[0], lr)) else: ps.log( 'Epoch %d/%d, Iter %d/%d, loss = %.2f, mse_loss = %.2f, rate_loss = %.2f, rate_display = %.2f, lr = %.8f' % (epoch, opt.max_epoch, idx, len(train_dataloader), total_loss_meter.value()[0], mse_loss_meter.value()[0], rate_loss_meter.value()[0], rate_display_meter.value()[0], lr)) # 进入debug模式 if os.path.exists(opt.debug_file): pdb.set_trace() # data parallel # if hasattr(model, 'module'): if use_data_parallel: model.module.save(optimizer, epoch) else: model.save(optimizer, epoch) # plot before val can ease me ps.make_plot( 'train mse loss' ) # all epoch share a same img, so give ""(default) to epoch ps.make_plot('cur epoch train mse loss', epoch) if opt.use_imp: ps.make_plot("train rate value") ps.make_plot("train rate loss") ps.make_plot("train total loss") # val if opt.use_imp: mse_val_loss, rate_val_loss, total_val_loss, rate_val_display = val( model, val_dataloader, mse_loss, rate_loss, ps) else: mse_val_loss = val(model, val_dataloader, mse_loss, None, ps) ps.add_point('val mse loss', mse_val_loss) if opt.use_imp: ps.add_point('val rate value', rate_val_display) ps.add_point('val rate loss', rate_val_loss) ps.add_point('val total loss', total_val_loss) ps.make_plot('val mse loss') if opt.use_imp: ps.make_plot('val rate value') ps.make_plot('val rate loss') ps.make_plot('val total loss') # log sth. if opt.use_imp: ps.log( 'Epoch:{epoch}, lr:{lr}, train_mse_loss: {train_mse_loss}, train_rate_loss: {train_rate_loss}, train_total_loss: {train_total_loss}, train_rate_display: {train_rate_display} \n\ val_mse_loss: {val_mse_loss}, val_rate_loss: {val_rate_loss}, val_total_loss: {val_total_loss}, val_rate_display: {val_rate_display} ' .format(epoch=epoch, lr=lr, train_mse_loss=mse_loss_meter.value()[0], train_rate_loss=rate_loss_meter.value()[0], train_total_loss=total_loss_meter.value()[0], train_rate_display=rate_display_meter.value()[0], val_mse_loss=mse_val_loss, val_rate_loss=rate_val_loss, val_total_loss=total_val_loss, val_rate_display=rate_val_display)) else: ps.log( 'Epoch:{epoch}, lr:{lr}, train_mse_loss:{train_mse_loss}, val_mse_loss:{val_mse_loss}' .format(epoch=epoch, lr=lr, train_mse_loss=mse_loss_meter.value()[0], val_mse_loss=mse_val_loss)) # Adaptive adjust lr # 每个lr,如果有opt.tolerant_max次比上次的val_loss还高, if opt.use_early_adjust: if total_loss_meter.value()[0] > previous_loss: tolerant_now += 1 if tolerant_now == opt.tolerant_max: tolerant_now = 0 same_lr_epoch = 0 lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr print('Anneal lr to', lr, 'at epoch', epoch, 'due to early stop.') ps.log( 'Anneal lr to %.10f at epoch %d due to early stop.' % (lr, epoch)) else: tolerant_now -= 1 if same_lr_epoch and same_lr_epoch % opt.lr_anneal_epochs == 0: same_lr_epoch = 0 tolerant_now = 0 lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr print('Anneal lr to', lr, 'at epoch', epoch, 'due to full epochs.') ps.log('Anneal lr to %.10f at epoch %d due to full epochs.' % (lr, epoch)) previous_loss = total_loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) alpha = [0.2,0.5,0.8,1.0,1.3.1.5.1.8.2.0,2.5] images, tags, labels = load_data(opt.data_path) pretrain_model = load_pretrain_model(opt.pretrain_model_path) y_dim = tags.shape[1] label_num = labels.shape[1] X, Y, L = split_data(images, tags, labels) print('...loading and splitting data finish') img_model = ImgModule(opt.bit, pretrain_model) txt_model = TxtModule(y_dim, opt.bit) hash_model = HashModule(opt.bit) label_model = LabModule(label_num) if opt.use_gpu: img_model = img_model.cuda() txt_model = txt_model.cuda() hash_model = hash_model.cuda() label_model = label_model.cuda() train_L = torch.from_numpy(L['train']) train_x = torch.from_numpy(X['train']) train_y = torch.from_numpy(Y['train']) query_L = torch.from_numpy(L['query']) query_x = torch.from_numpy(X['query']) query_y = torch.from_numpy(Y['query']) retrieval_L = torch.from_numpy(L['retrieval']) retrieval_x = torch.from_numpy(X['retrieval']) retrieval_y = torch.from_numpy(Y['retrieval']) num_train = train_x.shape[0] F_buffer = torch.randn(num_train, opt.bit) G_buffer = torch.randn(num_train, opt.bit) X_fea_buffer = torch.randn(num_train, opt.X_fea_nums) Y_fea_buffer = torch.randn(num_train,opt.Y_fea_nums) X_label_buffer = torch.randn(num_train, label_num) Y_label_buffer = torch.randn(num_train, label_num) Label_buffer = torch.randn(num_train, label_num) Label_hash_buffer = torch.randn(num_train, opt.bit) Label_label_buffer = torch.randn(num_train, label_num) if opt.use_gpu: train_L = train_L.cuda() F_buffer = F_buffer.cuda() G_buffer = G_buffer.cuda() X_fea_buffer = X_fea_buffer.cuda() Y_fea_buffer = Y_fea_buffer.cuda() Label_buffer = Label_buffer.cuda() X_label_buffer = X_label_buffer.cuda() Y_label_buffer = Y_label_buffer.cuda() Label_hash_buffer = Label_hash_buffer.cuda() Label_label_buffer = Label_label_buffer.cuda() Sim = calc_neighbor(train_L, train_L) ###############ddddddd B = torch.sign(F_buffer + G_buffer) B_buffer = torch.sign(F_buffer + G_buffer) batch_size = opt.batch_size lr = opt.lr optimizer_img = SGD(img_model.parameters(), lr=lr) optimizer_txt = SGD(txt_model.parameters(), lr=lr) optimizer_hash = SGD(hash_model.parameters(), lr=lr) optimizer_label = SGD(label_model.parameters(), lr=lr) learning_rate = np.linspace(opt.lr, np.power(10, -6.), opt.max_epoch + 1) result = { 'loss': [], 'hash_loss' : [], 'total_loss' : [] } ones = torch.ones(batch_size, 1) ones_ = torch.ones(num_train - batch_size, 1) unupdated_size = num_train - batch_size max_mapi2t = max_mapt2i = 0. for epoch in range(opt.max_epoch): # train label net for i in tqdm(range(num_train // batch_size)): index = np.random.permutation(num_train) ind = index[0: batch_size] unupdated_ind = np.setdiff1d(range(num_train), ind) sample_L = Variable(train_L[ind, :]) label = Variable(train_L[ind,:].unsqueeze(1).unsqueeze(-1).type(torch.float)) if opt.use_gpu: label = label.cuda() sample_L = sample_L.cuda() # similar matrix size: (batch_size, num_train) S = calc_neighbor(sample_L, train_L) label_hash, label_label = label_model(label) # Label_hash_buffer[ind, :] = label_hash.data Label_label_buffer[ind, :] = label_label.data Label = Variable(train_L) Label_B = torch.sign(label_hash) Label_H = Variable(Label_hash_buffer) theta_l = 1.0 / 2 * torch.matmul(label_hash, Label_H.t()) logloss_l = -torch.sum(S * theta_l - torch.log(1.0 + torch.exp(theta_l))) quantization_l = torch.sum(torch.pow(Label_hash_buffer[ind, :] - Label_B, 2)) labelloss_l = torch.sum(torch.pow(Label[ind, :].float() - label_label, 2)) loss_label = logloss_l + opt.beta * quantization_l + opt.alpha * labelloss_l # + logloss_x_fea loss_label /= (batch_size * num_train) optimizer_label.zero_grad() loss_label.backward() optimizer_label.step() # train image net for i in tqdm(range(num_train // batch_size)): index = np.random.permutation(num_train) ind = index[0: batch_size] unupdated_ind = np.setdiff1d(range(num_train), ind) sample_L = Variable(train_L[ind, :]) image = Variable(train_x[ind].type(torch.float)) if opt.use_gpu: image = image.cuda() sample_L = sample_L.cuda() # similar matrix size: (batch_size, num_train) S = calc_neighbor(sample_L, train_L) # S: (batch_size, num_train) image_fea, cur_f, image_label = img_model(image) # cur_f: (batch_size, bit) X_fea_buffer[ind, :] = image_fea.data F_buffer[ind, :] = cur_f.data X_label_buffer[ind, :] = image_label.data G = Variable(G_buffer) H_l = Variable(Label_hash_buffer) B_x = torch.sign(F_buffer) theta_x = 1.0 / 2 * torch.matmul(cur_f, H_l.t()) logloss_x = -torch.sum(S * theta_x - torch.log(1.0 + torch.exp(theta_x))) quantization_xh = torch.sum(torch.pow(B_buffer[ind, :] - cur_f, 2)) quantization_xb = torch.sum(torch.pow(B_x[ind, :]- cur_f, 2)) labelloss_x = torch.sum(torch.pow(train_L[ind, :].float() - image_label,2)) loss_x = logloss_x + opt.beta * quantization_xh + opt.alpha * labelloss_x + opt.gamma * quantization_xb# + logloss_x_fea loss_x /= (batch_size * num_train) optimizer_img.zero_grad() loss_x.backward() optimizer_img.step() # train txt net for i in tqdm(range(num_train // batch_size)): index = np.random.permutation(num_train) ind = index[0: batch_size] unupdated_ind = np.setdiff1d(range(num_train), ind) sample_L = Variable(train_L[ind, :]) text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float) text = Variable(text) if opt.use_gpu: text = text.cuda() sample_L = sample_L.cuda() # similar matrix size: (batch_size, num_train) S = calc_neighbor(sample_L, train_L) # S: (batch_size, num_train) txt_fea, cur_g, txt_label = txt_model(text) # cur_f: (batch_size, bit) Y_fea_buffer[ind, :] = txt_fea.data G_buffer[ind, :] = cur_g.data Y_label_buffer[ind, :] = txt_label.data F = Variable(F_buffer) H_l = Variable(Label_hash_buffer) B_y = torch.sign(F) # calculate loss # theta_y: (batch_size, num_train) theta_y = 1.0 / 2 * torch.matmul(cur_g, H_l.t()) logloss_y = -torch.sum(S * theta_y - torch.log(1.0 + torch.exp(theta_y))) quantization_yh = torch.sum(torch.pow(B_buffer[ind, :] - cur_g, 2)) quantization_yb = torch.sum(torch.pow(B_y[ind, :] - cur_g, 2)) labelloss_y = torch.sum(torch.pow(train_L[ind, :].float() - txt_label, 2)) loss_y = logloss_y + opt.beta * quantization_yh + opt.alpha * labelloss_y + opt.gamma * quantization_yb# + logloss_y_fea loss_y /= (num_train * batch_size) optimizer_txt.zero_grad() loss_y.backward() optimizer_txt.step() #train hash net for i in tqdm(range(num_train // batch_size)): index = np.random.permutation(num_train) ind = index[0: batch_size] unupdated_ind = np.setdiff1d(range(num_train), ind) sample_L = Variable(train_L[ind, :]) #W = norm(X_fea_buffer[ind, :], Y_fea_buffer[ind, :]) #fea = 1.0 / 2 * (torch.matmul(W, X_fea_buffer[ind, :]) + torch.matmul(W, Y_fea_buffer[ind, :])) fea = torch.cat([X_fea_buffer[ind, :], Y_fea_buffer[ind, :]], dim=1) fea = Variable(fea) if opt.use_gpu: fea = fea.cuda() sample_L = sample_L.cuda() S = calc_neighbor(sample_L, train_L) A = caculateAdj(sample_L, sample_L) cur_B, label_hash = hash_model(fea, A) B_buffer[ind, :] = cur_B.data #caculate loss B = Variable(torch.sign(B_buffer)) theta_hash = 1.0 / 2 * torch.matmul(cur_B, B_buffer.t()) logloss_hash = -torch.sum(S * theta_hash - torch.log(1.0 + torch.exp(theta_hash))) label_loss = torch.sum(torch.pow(train_L[ind, :].float() - label_hash, 2)) hashloss = torch.sum(torch.pow(B[ind, :] - cur_B, 2)) loss_hash = logloss_hash + opt.alpha * label_loss + opt.beta * hashloss optimizer_hash.zero_grad() loss_hash.backward() optimizer_hash.step() # train image net for i in tqdm(range(num_train // batch_size)): index = np.random.permutation(num_train) ind = index[0: batch_size] unupdated_ind = np.setdiff1d(range(num_train), ind) sample_L = Variable(train_L[ind, :]) image = Variable(train_x[ind].type(torch.float)) if opt.use_gpu: image = image.cuda() sample_L = sample_L.cuda() # similar matrix size: (batch_size, num_train) S = calc_neighbor(sample_L, train_L) # S: (batch_size, num_train) image_fea, cur_f, image_label = img_model(image) # cur_f: (batch_size, bit) X_fea_buffer[ind, :] = image_fea.data F_buffer[ind, :] = cur_f.data X_label_buffer[ind, :] = image_label.data G = Variable(G_buffer) H_l = Variable(Label_hash_buffer) B_x = torch.sign(F_buffer) theta_x = 1.0 / 2 * torch.matmul(cur_f, H_l.t()) logloss_x = -torch.sum(S * theta_x - torch.log(1.0 + torch.exp(theta_x))) quantization_xh = torch.sum(torch.pow(B_buffer[ind, :] - cur_f, 2)) quantization_xb = torch.sum(torch.pow(B_x[ind, :] - cur_f, 2)) labelloss_x = torch.sum(torch.pow(train_L[ind, :].float() - image_label, 2)) loss_x = logloss_x + opt.gamma * quantization_xh + opt.alpha * labelloss_x + opt.beta * quantization_xb # + logloss_x_fea loss_x /= (batch_size * num_train) optimizer_img.zero_grad() loss_x.backward() optimizer_img.step() # train txt net for i in tqdm(range(num_train // batch_size)): index = np.random.permutation(num_train) ind = index[0: batch_size] unupdated_ind = np.setdiff1d(range(num_train), ind) sample_L = Variable(train_L[ind, :]) text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float) text = Variable(text) if opt.use_gpu: text = text.cuda() sample_L = sample_L.cuda() # similar matrix size: (batch_size, num_train) S = calc_neighbor(sample_L, train_L) # S: (batch_size, num_train) txt_fea, cur_g, txt_label = txt_model(text) # cur_f: (batch_size, bit) Y_fea_buffer[ind, :] = txt_fea.data G_buffer[ind, :] = cur_g.data Y_label_buffer[ind, :] = txt_label.data F = Variable(F_buffer) H_l = Variable(Label_hash_buffer) B_y = torch.sign(F) # calculate loss # theta_y: (batch_size, num_train) theta_y = 1.0 / 2 * torch.matmul(cur_g, H_l.t()) logloss_y = -torch.sum(S * theta_y - torch.log(1.0 + torch.exp(theta_y))) quantization_yh = torch.sum(torch.pow(B_buffer[ind, :] - cur_g, 2)) quantization_yb = torch.sum(torch.pow(B_y[ind, :] - cur_g, 2)) labelloss_y = torch.sum(torch.pow(train_L[ind, :].float() - txt_label, 2)) loss_y = logloss_y + opt.gamma * quantization_yh + opt.alpha * labelloss_y + opt.beta * quantization_yb # + logloss_y_fea loss_y /= (num_train * batch_size) optimizer_txt.zero_grad() loss_y.backward() optimizer_txt.step() # calculate total loss loss, hash_loss, total_loss = calc_loss(B, F, G, Variable(Sim), opt.alpha, opt.beta,Label_buffer, train_L, X_label_buffer,Y_label_buffer) print('...epoch: %3d, loss: %3.3f, lr: %f' % (epoch + 1, loss.data, lr)) print('...epoch: %3d, hash_loss: %3.3f, lr: %f' % (epoch + 1, hash_loss.data, lr)) print('...epoch: %3d, total_loss: %3.3f, lr: %f' % (epoch + 1, total_loss.data, lr)) result['loss'].append(float(loss.data)) result['hash_loss'].append(float(hash_loss.data)) result['total_loss'].append(float(total_loss.data)) if opt.valid: mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x, query_y, retrieval_y, query_L, retrieval_L) print('...epoch: %3d, valid MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (epoch + 1, mapi2t, mapt2i)) if mapt2i >= max_mapt2i and mapi2t >= max_mapi2t: max_mapi2t = mapi2t max_mapt2i = mapt2i img_model.save(img_model.module_name + '.pth') txt_model.save(txt_model.module_name + '.pth') hash_model.save(hash_model.module_name+'.pth') lr = learning_rate[epoch + 1] # set learning rate for param in optimizer_img.param_groups: param['lr'] = lr for param in optimizer_txt.param_groups: param['lr'] = lr print('...training procedure finish') if opt.valid: print(' max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (max_mapi2t, max_mapt2i)) result['mapi2t'] = max_mapi2t result['mapt2i'] = max_mapt2i else: mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x, query_y, retrieval_y, query_L, retrieval_L) print(' max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i)) result['mapi2t'] = mapi2t result['mapt2i'] = mapt2i write_result(result)
def main(**kwargs): opt.parse(kwargs, print_=True) model = getattr(models, opt.model)(opt).cuda() print(model) lr, lr2 = opt.lr, opt.lr2 loss_function = getattr(models, opt.loss)() dataset = My_dataset(opt.seq_len, augment=opt.augument) dataloader = data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4, pin_memory=True) optimizer = model.get_optimizer(lr, lr2) pred_probs = [] batch_count = 0 not_increase_count = 0 pre_f1 = 0. f1 = 0. for epoch in range(opt.max_epoch): for ii, (content, label, sen_id) in enumerate(dataloader): content, label = content.cuda(), label.cuda() optimizer.zero_grad() score = model(content) #proba = score.detach().cpu().numpy() # lll = label.cpu().numpy() # mul = [] # for iiiii in range(len(lll)): # ccc = 0 # for jjjj in range(len(lll[0])): # if lll[iiiii][jjjj] == 1: # ccc += 1 # if ccc > 1: # mul.append(iiiii) # print(proba[mul]) # print(lll[mul]) predict = score.detach().cpu().numpy() predict_ind = np.zeros((predict.shape[0], 10), dtype=np.int32) for i in range(predict.shape[0]): ttt = predict[i] tttt = [ttt > 0.] predict_ind[i][tttt] = 1 #print(score.detach().cpu().numpy()) #print(label.cpu().numpy()) loss = loss_function(score, label) loss.backward() optimizer.step() f1 += f1_score(label.cpu().numpy(), predict_ind, average='macro') if batch_count % opt.plot_every == opt.plot_every - 1: # compute average f1 score f1 = f1 / opt.plot_every #eval() print('average f1: %f' % f1) if f1 < pre_f1: not_increase_count += 1 else: not_increase_count = 0 if not_increase_count > 3: if lr <= opt.min_lr: break lr *= opt.lr_decay lr2 *= 0.8 optimizer = model.get_optimizer(lr, lr2) pre_f1 = f1 f1 = 0. if lr <= opt.min_lr: break batch_count += 1 torch.save(model.cpu().state_dict(), 'cnn.pt')