def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env, port=opt.vis_port) model = models.WaveNet(opt.input_size, opt.out_size, opt.residual_size, opt.skip_size, opt.dilation_cycles, opt.dilation_depth) if opt.load_model_path: model.load(opt.load_model_path) device = torch.device('cuda') if opt.use_gpu else torch.device('cpu') model.to(device) data_utility = Data_utility(opt.train_data_root, opt.WINDOW_SIZE) scaler = data_utility.get_scaler() joblib.dump(scaler, 'scaler.pkl') X, Y = data_utility.get_data() criterion = nn.MSELoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) loss_meter = meter.AverageValueMeter() previous_loss = 1e10 for epoch in range(opt.max_epoch): loss_meter.reset() for i, (data, label) in tqdm( enumerate(data_utility.get_batches(X, Y, opt.batch_size))): inputs = data.to(device) targets = label.to(device) optimizer.zero_grad() preds = model(inputs) preds = preds.squeeze(2) loss = criterion(preds, targets) loss.backward() optimizer.step() loss_meter.add(loss.item()) if (i + 1) % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) save_name = 'models/checkpoints/' + opt.model + str(epoch) + '.pth' model.save(save_name) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(): vis = Visualizer(env='svs') model = getattr(models, 'Unet')() model.train().cuda() train_data = Spg('F:/crop_test', train=True) val_data = Spg('F:/crop_test', train=False) train_dataloader = DataLoader(train_data, batch_size=4, drop_last=True) val_dataloader = DataLoader(val_data, batch_size=1, drop_last=True) loss_meter = meter.AverageValueMeter() lr = 0.001 lr_decay = 0.05 optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=lr_decay) previous_loss = 1e100 for epoch in range(5): loss_meter.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): input1 = Variable(data).cuda() target = Variable(label).cuda() optimizer.zero_grad() scroe = model(input1) loss = MyLoss()(input1, scroe, target).cuda() loss.backward() optimizer.step() loss_meter.add(loss.data.item()) if ii % 20 == 19: vis.plot('loss', loss_meter.value().item()) prefix = 'G:/Unet_svs/check/' name = time.strftime(prefix + '%m%d_%H_%M_%S.pth') t.save(model.state_dict(), name) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) #step1: config model model = getattr(Nets,opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) #step2: data train_data = imageSentiment(opt.train_path,train = True) #训练集 val_data = imageSentiment(opt.train_path,train = False) #验证集 train_dataloader = DataLoader(train_data,batch_size = opt.batch_size,shuffle=True,num_workers = opt.num_workers) val_dataloader = DataLoader(val_data,batch_size = opt.batch_size,shuffle=False,num_workers = opt.num_workers) #step3: 定义损失函数及优化器 # criterion = nn.CrossEntropyLoss() #交叉熵损失函数 如果使用该损失函数 则网络最后无需使用softmax函数 lr = opt.lr # optimizer = Optim.Adam(model.parameters(),lr = lr,weight_decay= opt.weight_decay) optimizer = Optim.SGD(model.parameters(),lr = 0.001,momentum=0.9,nesterov=True) #step4: 统计指标(计算平均损失以及混淆矩阵) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(7) previous_loss = 1e100 #训练 for i in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() total_loss = 0. for ii,(label,data) in tqdm(enumerate(train_dataloader),total=len(train_dataloader)): if opt.use_gpu: label,data = label.to(device),data.to(device) optimizer.zero_grad() score = model(data) # ps:使用nll_loss和crossentropyloss进行多分类时 target为索引标签即可 无需转为one-hot loss = F.nll_loss(score,label) total_loss += loss.item() loss.backward() optimizer.step() #更新统计指标以及可视化 loss_meter.add(loss.item()) confusion_matrix.add(score.data,label.data) if ii%opt.print_freq==opt.print_freq-1: vis.plot('loss',loss_meter.value()[0]) vis.plot('mach avgloss', total_loss/len(train_dataloader)) model.save() #计算验证集上的指标 val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy)
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env,port = opt.vis_port) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) # step2: data train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size, shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size, shuffle=False,num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() train_loss = 0. train_acc = 0. i = 0 for ii,(data,label) in tqdm(enumerate(train_dataloader)): # train model input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score,target) train_loss += loss.item() pred = t.max(score, 1)[1] train_correct = (pred==target).sum() train_acc += train_correct.item() print('epoch ', epoch, ' batch ', i) i+=1 print('Train Loss: %f, Acc: %f' % (loss.item(), train_correct.item() / float(len(data)))) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), target.detach()) if (ii + 1)%opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len( train_data)), train_acc / (len(train_data)))) # model.save() prefix = 'checkpoints/' + opt.model + '_a'+str(epoch)+'.pth' t.save(model.state_dict(), prefix) # validate and visualize val_cm,val_accuracy = val(model,val_dataloader, criterion, val_data) vis.plot('val_accuracy',val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if os.path.exists(opt.load_model_path): model.load(opt.load_model_path) if opt.use_gpu: model.cuda() if os.path.exists(opt.pars_path): dic = load_dict(opt.pars_path) previous_loss = dic['loss'][-1] if 'loss' in dic.keys() else 1e100 else: dic = {} # step2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step2: criterion and optimizer criterion = nn.CrossEntropyLoss() lr = opt.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) #previous_loss = 1e100 # train for epoch in range(5, opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)): #confusion_matrix.reset() # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data.item()) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: dic = save_dict(opt.pars_path, dic, loss_data=loss_meter.value()[0]) #loss_meter.reset() vis.plot('loss', dic['loss_data']) name = model.save() if os.path.exists(opt.debug_file): import ipdb ipdb.set_trave() name = model.save() # update learning: reduce learning rate when loss no longer decrease if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] dic = save_dict(opt.pars_path, dic, name=name, epoch=epoch, lr=lr, loss=loss_meter.value()[0], train_cm=confusion_matrix.value()) # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) dic = save_dict(opt.pars_path, dic, val_accuracy=val_accuracy, val_cm=val_cm.value()) vis.log(dic)
def train(**kwargs): ''' 训练 :param kwargs: 可调整参数,默认是config中的默认参数 :return:训练出完整模型 ''' # 根据命令行参数更新配置 opt.parse(kwargs) # visdom绘图程序 vis = Visualizer(opt.env, port=opt.vis_port) # step:1 构建模型 # 选取配置中名字为model的模型 model = getattr(models, opt.model)() # 是否读取保存好的模型参数 if opt.load_model_path: model.load(opt.load_model_path) # 设置GPU os.environ["CUDA_VISIBLE_DEVICES"] = "2" model.to(opt.device) # step2: 数据 train_data = CWRUDataset2D(opt.train_data_root, train=True) # 测试数据集和验证数据集是一样的,这些数据是没有用于训练的 test_data = CWRUDataset2D(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True) test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False) # step3: 目标函数和优化器 # 损失函数,交叉熵 criterion = torch.nn.CrossEntropyLoss() lr = opt.lr # 优化函数,Adam optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: 统计指标,平滑处理之后的损失,还有混淆矩阵 # 损失进行取平均及方差计算。 loss_meter = meter.AverageValueMeter() # 混淆矩阵 confusion_matrix = meter.ConfusionMeter(opt.category) previous_loss = 1e10 # 训练 for epoch in range(opt.max_epoch): # 重置 loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): # 训练模型 input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # 更新统计指标以及可视化 loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), target.detach()) if (ii + 1) % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() # 每个batch保存模型 model.save() # 计算测试集上的指标和可视化 val_cm, val_accuracy = val(model, test_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # 如果损失不在下降,那么就降低学习率 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env, port=opt.vis_port) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) # step2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() print("trian epoch: ", epoch) for ii, (data, label) in tqdm(enumerate(train_dataloader)): # train model input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), target.detach()) if (ii + 1) % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # # 进入debug模式 # if os.path.exists(opt.debug_file): # import ipdb; # ipdb.set_trace() model.save() # validate and visualize print("start eval:") val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr vis.plot('lr', lr) previous_loss = loss_meter.value()[0]
def train(**kwargs): """ 训练 """ # 根据传入的参数更改配置信息 opt.parse(kwargs) vis = Visualizer(opt.env) cudnn.enabled = True cudnn.benchmark = True # step1: 配置并加载模型 model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) # step2: 加载数据(训练集和交叉验证集) train_data = SceneData(opt.train_data_root, opt.labels, train=True) val_data = SceneData(opt.train_data_root, opt.labels, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # step3: 目标函数和优化器 criterion = t.nn.CrossEntropyLoss() # 交叉熵损失函数 lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) # Adam算法 """ # 冻结除全连接层外的所有层,只训练最后的全连接层(用于有全连接层模型的finetune) for para in list(model.parameters())[:-1]: para.requires_grad = False optimizer = t.optim.Adam(params=[model.fc.weight, model.fc.bias], lr=opt.lr, weight_decay=opt.weight_decay) # Adam算法 """ # step4: 统计指标:平滑处理之后的损失,还有混淆矩阵 loss_meter = meter.AverageValueMeter() # 能够计算所有数的平均值和标准差,用来统计一次训练中损失的平均值 confusion_matrix = meter.ConfusionMeter(opt.num_labels) previous_loss = 1e100 # 训练 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() # 每次读出一个batch的数据训练 for step, (data, label) in tqdm.tqdm(enumerate(train_dataloader), total=len(train_data)): train_input = data.to(opt.device) label_input = label.to(opt.device) optimizer.zero_grad() # 梯度清零 score = model(train_input) # 调用模型 loss = criterion(score, label_input) # 计算损失函数 loss.backward() # 反向传播 optimizer.step() # 优化 # 更新统计指标及可视化 loss_meter.add(loss.item()) confusion_matrix.add(score.detach(), label_input.detach()) if step % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) model.save() # 计算验证集上的指标及可视化 val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # 如果损失不再下降,则降低学习率 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 动态修改学习率 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): #init for k_, v_ in kwargs.items(): setattr(opt, k_, v_) if opt.vis: vis = Visualizer(opt.env) vis_val = Visualizer('valdemoire') #dataset FiveCrop_transforms = transforms.Compose([ transforms.FiveCrop(256), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])) ]) data_transforms = transforms.Compose([ # transforms.RandomCrop(256), transforms.ToTensor() ]) train_data = MoireData(opt.train_path) test_data = MoireData(opt.test_path, is_val=True) train_dataloader = DataLoader(train_data, batch_size=opt.train_batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) test_dataloader = DataLoader(test_data, batch_size=opt.val_batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) last_epoch = 0 #model_init cfg.merge_from_file("config/cfg.yaml") model = get_pose_net(cfg, pretrained=opt.model_path) #initweight model = model.to(opt.device) if opt.vis: val_loss, val_psnr = val(model, test_dataloader, vis_val) print(val_loss, val_psnr) else: val_loss, val_psnr = val(model, test_dataloader) print(val_loss, val_psnr) criterion_c = L1_Charbonnier_loss() criterion_s = L1_Sobel_Loss() lr = opt.lr optimizer = torch.optim.Adam( params=model.parameters(), lr=lr, weight_decay=0.01 #0.005 ) if opt.model_path: map_location = lambda storage, loc: storage checkpoint = torch.load(opt.model_path, map_location=map_location) last_epoch = checkpoint["epoch"] optimizer_state = checkpoint["optimizer"] optimizer.load_state_dict(optimizer_state) lr = checkpoint["lr"] for param_group in optimizer.param_groups: param_group['lr'] = lr loss_meter = meter.AverageValueMeter() psnr_meter = meter.AverageValueMeter() previous_loss = 1e100 accumulation_steps = opt.accumulation_steps for epoch in range(opt.max_epoch): if epoch < last_epoch: continue loss_meter.reset() psnr_meter.reset() torch.cuda.empty_cache() loss_list = [] for ii, (moires, clear_list) in tqdm(enumerate(train_dataloader)): moires = moires.to(opt.device) clears = clear_list[0].to(opt.device) output_list, edge_output_list = model(moires) outputs, edge_X = output_list[0], edge_output_list[0] if epoch < 20: pass elif epoch >= 20 and epoch < 40: opt.loss_alpha = 0.9 else: opt.loss_alpha = 1.0 c_loss = criterion_c(outputs, clears) s_loss = criterion_s(edge_X, clears) loss = opt.loss_alpha * c_loss + (1 - opt.loss_alpha) * s_loss # saocaozuo gradient accumulation loss = loss / accumulation_steps loss.backward() if (ii + 1) % accumulation_steps == 0: optimizer.step() optimizer.zero_grad() loss_meter.add(loss.item() * accumulation_steps) moires = tensor2im(moires) outputs = tensor2im(outputs) clears = tensor2im(clears) psnr = colour.utilities.metric_psnr(outputs, clears) psnr_meter.add(psnr) if opt.vis and (ii + 1) % opt.plot_every == 0: #100个batch画图一次 vis.images(moires, win='moire_image') vis.images(outputs, win='output_image') vis.text( "current outputs_size:{outputs_size},<br/> outputs:{outputs}<br/>" .format(outputs_size=outputs.shape, outputs=outputs), win="size") vis.images(clears, win='clear_image') #record the train loss to txt vis.plot('train_loss', loss_meter.value() [0]) #meter.value() return 2 value of mean and std vis.log( "epoch:{epoch}, lr:{lr}, train_loss:{loss}, train_psnr:{train_psnr}" .format(epoch=epoch + 1, loss=loss_meter.value()[0], lr=lr, train_psnr=psnr_meter.value()[0])) loss_list.append(str(loss_meter.value()[0])) torch.cuda.empty_cache() if opt.vis: val_loss, val_psnr = val(model, test_dataloader, vis_val) vis.plot('val_loss', val_loss) vis.log( "epoch:{epoch}, average val_loss:{val_loss}, average val_psnr:{val_psnr}" .format(epoch=epoch + 1, val_loss=val_loss, val_psnr=val_psnr)) else: val_loss, val_psnr = val(model, test_dataloader) #每个epoch把loss写入文件 with open(opt.save_prefix + "loss_list.txt", 'a') as f: f.write("\nepoch_{}\n".format(epoch + 1)) f.write('\n'.join(loss_list)) if (epoch + 1) % opt.save_every == 0 or epoch == 0: # 每5个epoch保存一次 prefix = opt.save_prefix + 'HRnet_epoch{}_'.format(epoch + 1) file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth') checkpoint = { 'epoch': epoch + 1, "optimizer": optimizer.state_dict(), "model": model.state_dict(), "lr": lr } torch.save(checkpoint, file_name) if (loss_meter.value()[0] > previous_loss) or ((epoch + 1) % 10) == 0: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] prefix = opt.save_prefix + 'HRnet_final_' file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth') checkpoint = { 'epoch': epoch + 1, "optimizer": optimizer.state_dict(), "model": model.state_dict(), "lr": lr } torch.save(checkpoint, file_name)
def train(self): if self.net == 'vgg16': photo_net = DataParallel(self._get_vgg16()).cuda() sketch_net = DataParallel(self._get_vgg16()).cuda() elif self.net == 'resnet34': photo_net = DataParallel(self._get_resnet34()).cuda() sketch_net = DataParallel(self._get_resnet34()).cuda() elif self.net == 'resnet50': photo_net = DataParallel(self._get_resnet50()).cuda() sketch_net = DataParallel(self._get_resnet50()).cuda() if self.fine_tune: photo_net_root = self.model_root sketch_net_root = self.model_root.replace('photo', 'sketch') photo_net.load_state_dict( t.load(photo_net_root, map_location=t.device('cpu'))) sketch_net.load_state_dict( t.load(sketch_net_root, map_location=t.device('cpu'))) print('net') print(photo_net) # triplet_loss = nn.TripletMarginLoss(margin=self.margin, p=self.p).cuda() photo_cat_loss = nn.CrossEntropyLoss().cuda() sketch_cat_loss = nn.CrossEntropyLoss().cuda() my_triplet_loss = TripletLoss().cuda() # optimizer photo_optimizer = t.optim.Adam(photo_net.parameters(), lr=self.lr) sketch_optimizer = t.optim.Adam(sketch_net.parameters(), lr=self.lr) if self.vis: vis = Visualizer(self.env) triplet_loss_meter = AverageValueMeter() sketch_cat_loss_meter = AverageValueMeter() photo_cat_loss_meter = AverageValueMeter() data_loader = TripleDataLoader(self.dataloader_opt) dataset = data_loader.load_data() for epoch in range(self.epochs): print('---------------{0}---------------'.format(epoch)) if self.test and epoch % self.test_f == 0: tester_config = Config() tester_config.test_bs = 128 tester_config.photo_net = photo_net tester_config.sketch_net = sketch_net tester_config.photo_test = self.photo_test tester_config.sketch_test = self.sketch_test tester = Tester(tester_config) test_result = tester.test_instance_recall() result_key = list(test_result.keys()) vis.plot('recall', np.array([ test_result[result_key[0]], test_result[result_key[1]] ]), legend=[result_key[0], result_key[1]]) if self.save_model: t.save( photo_net.state_dict(), self.save_dir + '/photo' + '/photo_' + self.net + '_%s.pth' % epoch) t.save( sketch_net.state_dict(), self.save_dir + '/sketch' + '/sketch_' + self.net + '_%s.pth' % epoch) photo_net.train() sketch_net.train() for ii, data in enumerate(dataset): photo_optimizer.zero_grad() sketch_optimizer.zero_grad() photo = data['P'].cuda() sketch = data['S'].cuda() label = data['L'].cuda() p_cat, p_feature = photo_net(photo) s_cat, s_feature = sketch_net(sketch) # category loss p_cat_loss = photo_cat_loss(p_cat, label) s_cat_loss = sketch_cat_loss(s_cat, label) photo_cat_loss_meter.add(p_cat_loss.item()) sketch_cat_loss_meter.add(s_cat_loss.item()) # triplet loss loss = p_cat_loss + s_cat_loss # tri_record = 0. ''' for i in range(self.batch_size): # negative negative_feature = t.cat([p_feature[0:i, :], p_feature[i + 1:, :]], dim=0) # print('negative_feature.size :', negative_feature.size()) # photo_feature anchor_feature = s_feature[i, :] anchor_feature = anchor_feature.expand_as(negative_feature) # print('anchor_feature.size :', anchor_feature.size()) # positive positive_feature = p_feature[i, :] positive_feature = positive_feature.expand_as(negative_feature) # print('positive_feature.size :', positive_feature.size()) tri_loss = triplet_loss(anchor_feature, positive_feature, negative_feature) tri_record = tri_record + tri_loss # print('tri_loss :', tri_loss) loss = loss + tri_loss ''' # print('tri_record : ', tri_record) my_tri_loss = my_triplet_loss( s_feature, p_feature) / (self.batch_size - 1) triplet_loss_meter.add(my_tri_loss.item()) # print('my_tri_loss : ', my_tri_loss) # print(tri_record - my_tri_loss) loss = loss + my_tri_loss # print('loss :', loss) # loss = loss / opt.batch_size loss.backward() photo_optimizer.step() sketch_optimizer.step() if self.vis: vis.plot('triplet_loss', np.array([ triplet_loss_meter.value()[0], photo_cat_loss_meter.value()[0], sketch_cat_loss_meter.value()[0] ]), legend=[ 'triplet_loss', 'photo_cat_loss', 'sketch_cat_loss' ]) triplet_loss_meter.reset() photo_cat_loss_meter.reset() sketch_cat_loss_meter.reset()
def train(): vis = Visualizer("Kesci") train_data = AppData("data/data_16d_target/train.json", iflabel=True) val_data = AppData("data/data_16d_target/val.json", iflabel=True) train_dataloader = DataLoader(train_data, 32, shuffle=True, num_workers=4) val_dataloader = DataLoader(val_data, 256, shuffle=False, num_workers=2) test_data = AppData("data/data_16d_target/test.json", iflabel=True) test_dataloader = DataLoader(test_data, 256, shuffle=False, num_workers=2) criterion = t.nn.CrossEntropyLoss().cuda() learning_rate = 0.003 weight_decay = 0.0002 model = Sequence(15, 128, 1).cuda() optimizer = t.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(500): loss_meter.reset() confusion_matrix.reset() for ii, (data, property, label) in tqdm(enumerate(train_dataloader)): input = Variable(data).cuda() input2 = Variable(property).cuda() target = Variable(label).cuda().view(-1) output = model(input, input2) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) confusion_matrix.add(output.data, target.data) if ii % 100 == 99: vis.plot('loss', loss_meter.value()[0]) if epoch % 3 == 2: train_cm, train_f1 = val(model, train_dataloader) vis.plot('train_f1', train_f1) val_cm, val_f1 = val(model, val_dataloader) vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate}) # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( # epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), # train_cm=str(confusion_matrix.value()), lr=learning_rate)) if loss_meter.value()[0] > previous_loss: learning_rate = learning_rate * 0.95 # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate previous_loss = loss_meter.value()[0] if epoch % 10 == 9: model.save() test_cm, test_f1 = val(model, test_dataloader) vis.plot('test_f1', test_f1) vis.log( "model:{model} | {train_f1}, {train_pre}, {train_rec} | {val_f1}, {val_pre}, {val_rec} | {test_f1}, {test_pre}, {test_rec}" .format(train_f1=train_f1, val_f1=val_f1, test_f1=test_f1, model=time.strftime('%m%d %H:%M:%S'), train_pre=str(train_cm.value()[0][0] / train_cm.value()[:, 0].sum()), train_rec=str(train_cm.value()[0][0] / train_cm.value()[0].sum()), val_pre=str(val_cm.value()[0][0] / val_cm.value()[:, 0].sum()), val_rec=str(val_cm.value()[0][0] / val_cm.value()[0].sum()), test_pre=str(test_cm.value()[0][0] / test_cm.value()[:, 0].sum()), test_rec=str(test_cm.value()[0][0] / test_cm.value()[0].sum())))
class Trainer(BaseTrainer): """ Trainer class Note: Inherited from BaseTrainer. self.optimizer is by default handled by BaseTrainer based on config. """ def __init__(self, model, loss, metrics, resume, config, data_loader, toolbox: Toolbox, valid_data_loader=None, train_logger=None): super(Trainer, self).__init__(model, loss, metrics, resume, config, train_logger) self.config = config self.batch_size = data_loader.batch_size self.data_loader = data_loader self.valid_data_loader = valid_data_loader self.valid = True if self.valid_data_loader is not None else False self.log_step = int(np.sqrt(self.batch_size)) self.toolbox = toolbox self.visdom = Visualizer(env='FOTS') def _to_tensor(self, *tensors): t = [] for __tensors in tensors: t.append(__tensors.to(self.device)) return t def _eval_metrics(self, output, target, mask): acc_metrics = np.zeros(len(self.metrics)) output = output.cpu().data.numpy() target = target.cpu().data.numpy() output = np.argmax(output, axis=1) for i, metric in enumerate(self.metrics): acc_metrics[i] += metric(output, target) return acc_metrics def _train_epoch(self, epoch): """ Training logic for an epoch :param epoch: Current training epoch. :return: A log that contains all information you want to save. Note: If you have additional information to record, for example: > additional_log = {"x": x, "y": y} merge it with log before return. i.e. > log = {**log, **additional_log} > return log The metrics in log must have the key 'metrics'. """ self.model.train() total_loss = 0 total_metrics = np.zeros(len(self.metrics)) for batch_idx, gt in enumerate(self.data_loader): img, score_map, geo_map, training_mask, transcript = gt img, score_map, geo_map, training_mask = self._to_tensor( img, score_map, geo_map, training_mask) recog_map = None self.optimizer.zero_grad() pred_score_map, pred_geo_map, pred_recog_map = self.model(img) loss = self.loss(score_map, pred_score_map, geo_map, pred_geo_map, pred_recog_map, recog_map, training_mask) loss.backward() self.optimizer.step() total_loss += loss.item() #total_metrics += self._eval_metrics(output, target) total_metrics += 0 if self.verbosity >= 2 and batch_idx % self.log_step == 0: self.logger.info( 'Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format( epoch, batch_idx * self.data_loader.batch_size, len(self.data_loader) * self.data_loader.batch_size, 100.0 * batch_idx / len(self.data_loader), loss.item())) self.visdom.plot('train_loss', total_loss / len(self.data_loader)) log = { 'loss': total_loss / len(self.data_loader), 'metrics': (total_metrics / len(self.data_loader)).tolist() } if self.valid: val_log = self._valid_epoch() log = {**log, **val_log} return log def _valid_epoch(self): """ Validate after training an epoch :return: A log that contains information about validation Note: The validation metrics in log must have the key 'val_metrics'. """ self.model.eval() total_val_loss = 0 total_val_metrics = np.zeros(len(self.metrics)) with torch.no_grad(): for batch_idx, gt in enumerate(self.valid_data_loader): img, score_map, geo_map, training_mask, transcript = gt img, score_map, geo_map, training_mask = self._to_tensor( img, score_map, geo_map, training_mask) recog_map = None pred_score_map, pred_geo_map, pred_recog_map = self.model(img) loss = self.loss(score_map, pred_score_map, geo_map, pred_geo_map, pred_recog_map, recog_map, training_mask) total_val_loss += loss.item() output = (pred_score_map, pred_geo_map, pred_recog_map) target = (score_map, geo_map, recog_map) #total_val_metrics += self._eval_metrics(output, target, training_mask) #TODO: should add AP metric self.visdom.plot('val_loss', total_val_loss / len(self.valid_data_loader)) return { 'val_loss': total_val_loss / len(self.valid_data_loader), 'val_metrics': (total_val_metrics / len(self.valid_data_loader)).tolist() }
def train(): t.cuda.set_device(1) # n_channels:医学影像为一通道灰度图 n_classes:二分类 net = UNet(n_channels=1, n_classes=1) optimizer = t.optim.SGD(net.parameters(), lr=opt.learning_rate, momentum=0.9, weight_decay=0.0005) criterion = t.nn.BCELoss() # 二进制交叉熵(适合mask占据图像面积较大的场景) start_epoch = 0 if opt.load_model_path: checkpoint = t.load(opt.load_model_path) # 加载多GPU模型参数到 单模型上 state_dict = checkpoint['net'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v net.load_state_dict(new_state_dict) # 加载模型 optimizer.load_state_dict(checkpoint['optimizer']) # 加载优化器 start_epoch = checkpoint['epoch'] # 加载训练批次 # 学习率每当到达milestones值则更新参数 if start_epoch == 0: scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer, milestones=opt.milestones, gamma=0.1, last_epoch=-1) # 默认为-1 print('从头训练 ,学习率为{}'.format(optimizer.param_groups[0]['lr'])) else: scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer, milestones=opt.milestones, gamma=0.1, last_epoch=start_epoch) print('加载预训练模型{}并从{}轮开始训练,学习率为{}'.format( opt.load_model_path, start_epoch, optimizer.param_groups[0]['lr'])) # 网络转移到GPU上 if opt.use_gpu: net = t.nn.DataParallel(net, device_ids=opt.device_ids) # 模型转为GPU并行 net.cuda() cudnn.benchmark = True # 定义可视化对象 vis = Visualizer(opt.env) train_data = NodeDataSet(train=True) val_data = NodeDataSet(val=True) test_data = NodeDataSet(test=True) # 数据集加载器 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) test_dataloader = DataLoader(test_data, opt.test_batch_size, shuffle=False, num_workers=opt.num_workers) for epoch in range(opt.max_epoch - start_epoch): print('开始 epoch {}/{}.'.format(start_epoch + epoch + 1, opt.max_epoch)) epoch_loss = 0 # 每轮判断是否更新学习率 scheduler.step() # 迭代数据集加载器 for ii, (img, mask) in enumerate( train_dataloader): # pytorch0.4写法,不再将tensor封装为Variable # 将数据转到GPU if opt.use_gpu: img = img.cuda() true_masks = mask.cuda() masks_pred = net(img) # 经过sigmoid masks_probs = t.sigmoid(masks_pred) # 损失 = 二进制交叉熵损失 + dice损失 loss = criterion(masks_probs.view(-1), true_masks.view(-1)) # 加入dice损失 if opt.use_dice_loss: loss += dice_loss(masks_probs, true_masks) epoch_loss += loss.item() if ii % 2 == 0: vis.plot('训练集loss', loss.item()) # 优化器梯度清零 optimizer.zero_grad() # 反向传播 loss.backward() # 更新参数 optimizer.step() # 当前时刻的一些信息 vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format( epoch=epoch, loss=loss.item(), lr=optimizer.param_groups[0]['lr'])) vis.plot('每轮epoch的loss均值', epoch_loss / ii) # 保存模型、优化器、当前轮次等 state = { 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } t.save(state, opt.checkpoint_root + '{}_unet.pth'.format(epoch)) # ============验证=================== net.eval() # 评价函数:Dice系数 Dice距离用于度量两个集合的相似性 tot = 0 for jj, (img_val, mask_val) in enumerate(val_dataloader): img_val = img_val true_mask_val = mask_val if opt.use_gpu: img_val = img_val.cuda() true_mask_val = true_mask_val.cuda() mask_pred = net(img_val) mask_pred = (t.sigmoid(mask_pred) > 0.5).float() # 阈值为0.5 # 评价函数:Dice系数 Dice距离用于度量两个集合的相似性 tot += dice_loss(mask_pred, true_mask_val).item() val_dice = tot / jj vis.plot('验证集 Dice损失', val_dice) # ============验证召回率=================== # 每10轮验证一次测试集召回率 if epoch % 10 == 0: result_test = [] for kk, (img_test, mask_test) in enumerate(test_dataloader): # 测试 unet分割能力,故 不使用真值mask if opt.use_gpu: img_test = img_test.cuda() mask_pred_test = net(img_test) # [1,1,512,512] probs = t.sigmoid(mask_pred_test).squeeze().squeeze().cpu( ).detach().numpy() # [512,512] mask = probs > opt.out_threshold result_test.append(mask) # 得到 测试集所有预测掩码,计算二维召回率 vis.plot('测试集二维召回率', getRecall(result_test).getResult()) net.train()
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # 数据设定 户籍科 010 82640433 train_data = DogCat(opt.load_model_path, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) train_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # 目标函数和优化器 criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model) # 统计指标,平滑处理之后的损失 loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm( enumerate(train_dataloader)): # ii num ,(data,label) enumerate # 训练模型参数 input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.stop() # 更新统计指标及可视化 loss_meter.add(loss.data[0]) confusion_matrix.add(loss.data[0]) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) if os.path.exist(opt.debug_file): import ipdb ipdb.set_trace() model.save() # 计算验证集上的指标及其可视化 val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( 'epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}' .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)(opt) if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DocumentPair(opt.train_data_root, doc_type='train', suffix='txt', load=lambda x: x.strip().split(',')) train_data.initialize(vocab_size=opt.vocab_size) val_data = DocumentPair(opt.validate_data_root, doc_type='validate', suffix='txt', load=lambda x: x.strip().split(','), vocab=train_data.vocab) val_data.initialize() train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, batch in enumerate(train_dataloader): data_left, data_right, label, num_pos = load_data( batch, opt, train_data.vocab) # train model input_data_left, input_data_right = Variable( t.from_numpy(data_left)), Variable(t.from_numpy(data_right)) target = Variable(t.from_numpy(label)) if opt.use_gpu: input_data_left, input_data_right = input_data_left.cuda( ), input_data_right.cuda() target = target.cuda() optimizer.zero_grad() scores, predictions = model((input_data_left, input_data_right)) loss = criterion(scores, target.max(1)[1]) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data[0]) confusion_matrix.add(predictions.data, target.max(1)[1].data) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() model.save() # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train_model(model, criterion, optimizer, dataloaders, model_path, start_epoch, iter_num, logger, device): since = time.time() best_loss = np.inf best_map = 0 trial_log = args.trial_log num_epochs = args.num_epochs test_interval = args.test_interval burn_in = args.burn_in lr = args.learning_rate lr_steps = args.lr_steps size_grid_cell = args.size_grid_cell num_boxes = args.num_boxes num_classes = args.num_classes conf_thresh = args.conf_thresh iou_thresh = args.iou_thresh nms_thresh = args.nms_thresh port = args.port vis = Visualizer(env=trial_log, port=port) for epoch in range(start_epoch, num_epochs): logger.info('Epoch {} / {}'.format(epoch+1, num_epochs)) logger.info('-' * 64) # set learning rate manually if epoch in lr_steps: lr *= 0.1 adjust_learning_rate(optimizer, lr) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': # scheduler.step() model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode total_loss = 0.0 # Iterate over data. for i, (inputs, targets) in enumerate(dataloaders[phase]): # warmming up of the learning rate if phase == 'train': if iter_num < args.burn_in: burn_lr = get_learning_rate(iter_num, lr, burn_in) adjust_learning_rate(optimizer, burn_lr) iter_num += 1 else: adjust_learning_rate(optimizer, lr) inputs = inputs.to(device) targets = targets.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss, obj_coord_loss, obj_conf_loss, noobj_conf_loss, obj_class_loss = criterion(outputs, targets) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics total_loss += loss.item() if phase == 'train': cur_lr = optimizer.state_dict()['param_groups'][0]['lr'] vis.plot('cur_lr', cur_lr) logger.info('Epoch [{}/{}], iter [{}/{}], lr: {:g}, loss: {:.4f}, average_loss: {:.4f}'.format( epoch+1, args.num_epochs, i+1, len(dataloaders[phase]), cur_lr, loss.item(), total_loss/(i+1))) logger.debug(' obj_coord_loss: {:.4f}, obj_conf_loss: {:.4f}, noobj_conf_loss: {:.4f}, obj_class_loss: {:.4f}'.format( obj_coord_loss, obj_conf_loss, noobj_conf_loss, obj_class_loss)) vis.plot('train_loss', total_loss/(i+1)) # save model for inferencing and resuming training process if phase == 'train': torch.save(model.state_dict(), osp.join(model_path, 'latest.pth')) torch.save({ 'iter_num: ': iter_num, 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, osp.join(model_path, 'latest.tar')) # evaluate latest model if phase == 'val': current_loss = total_loss / (i+1) if best_loss > current_loss: best_loss = current_loss logger.info('current val loss: {:.4f}, best val Loss: {:.4f}'.format(current_loss, best_loss)) vis.plot('val_loss', total_loss/(i+1)) if epoch < 10 or (epoch+1) % test_interval == 0: current_map = calc_map(logger, dataloaders[phase].dataset, model_path, size_grid_cell, num_boxes, num_classes, conf_thresh, iou_thresh, nms_thresh) # save the best model as so far if best_map < current_map: best_map = current_map torch.save(model.state_dict(), osp.join(model_path, 'best.pth')) logger.info('current val map: {:.4f}, best val map: {:.4f}'.format(current_map, best_map)) vis.plot('val_map', current_map) time_elapsed = time.time() - since logger.info('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) logger.info('Optimization Done.')
def train(**kwargs): """根据命令行参数更新配置""" opt.parse(kwargs) vis = Visualizer(opt.env) """(1)step1:加载网络,若有预训练模型也加载""" #model = getattr(models,opt.model)() model = models.resnet34(pretrained=True) model.fc = nn.Linear(512, 2) #if opt.load_model_path: # model.load(opt.load_model_path) if opt.use_gpu: #GPU model.cuda() """(2)step2:处理数据""" train_data = DogCat(opt.train_data_root, train=True) #训练集 val_data = DogCat(opt.train_data_root, train=False) #验证集 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) """(3)step3:定义损失函数和优化器""" criterion = t.nn.CrossEntropyLoss() #交叉熵损失 lr = opt.lr #学习率 optimizer = t.optim.SGD(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵""" loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 """(5)开始训练""" for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_dataloader): print "ii:", ii #训练模型参数 input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() #梯度清零 optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() #反向传播 #更新参数 optimizer.step() #更新统计指标及可视化 loss_meter.add(loss.item()) #print score.shape,target.shape confusion_matrix.add(score.detach(), target.detach()) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() #model.save() name = time.strftime('model' + '%m%d_%H:%M:%S.pth') t.save(model.state_dict(), 'checkpoints/' + name) """计算验证集上的指标及可视化""" val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) print "epoch:", epoch, "loss:", loss_meter.value( )[0], "accuracy:", val_accuracy """如果损失不再下降,则降低学习率""" if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group["lr"] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env,port = opt.vis_port) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) # step2: data train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size, shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size, shuffle=False,num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii,(data,label) in tqdm(enumerate(train_dataloader)): # train model input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), target.detach()) if (ii + 1)%opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() model.save() # validate and visualize val_cm,val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): for k_, v_ in kwargs.items(): setattr(opt, k_, v_) if opt.vis: from utils.visualize import Visualizer vis = Visualizer(opt.env) transforms = tv.transforms.Compose([ tv.transforms.Scale(opt.image_size), tv.transforms.CenterCrop(opt.image_size), tv.transforms.ToTensor(), tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = tv.datasets.ImageFolder(opt.data_path, transform=transforms) dataloader = t.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) # 定义网络 netg, netd = NetGenerator(opt), NetD(opt) map_location = lambda storage, loc: storage if opt.netd_path: netd.load_state_dict(t.load(opt.netd_path, map_location=map_location)) if opt.netg_path: netg.load_state_dict(t.load(opt.netg_path, map_location=map_location)) # 定义优化器和损失 optimizer_g = t.optim.Adam(netg.parameters(), opt.G_lr, betas=(opt.beta1, 0.999)) optimizer_d = t.optim.Adam(netd.parameters(), opt.D_lr, betas=(opt.beta1, 0.999)) criterion = t.nn.BCELoss() # 真图片label为1,假图片label为0 # noises为生成网络的输入 true_labels = Variable(t.ones(opt.batch_size)) fake_labels = Variable(t.zeros(opt.batch_size)) fix_noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1)) noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1)) errord_meter = AverageValueMeter() errorg_meter = AverageValueMeter() if opt.use_gpu: netd.cuda() netg.cuda() criterion.cuda() true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda() fix_noises, noises = fix_noises.cuda(), noises.cuda() epochs = range(opt.max_epoch) for epoch in iter(epochs): for ii, (img, _) in tqdm.tqdm(enumerate(dataloader)): real_img = Variable(img) if opt.use_gpu: real_img = real_img.cuda() if ii % opt.d_every == 0: # 训练判别器 optimizer_d.zero_grad() ## 尽可能的把真图片判别为正确 output = netd(real_img) error_d_real = criterion(output, true_labels) error_d_real.backward() ## 尽可能把假图片判别为错误 noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1)) fake_img = netg(noises).detach() # 根据噪声生成假图 output = netd(fake_img) error_d_fake = criterion(output, fake_labels) error_d_fake.backward() optimizer_d.step() error_d = error_d_fake + error_d_real errord_meter.add(error_d.data[0]) if ii % opt.g_every == 0: # 训练生成器 optimizer_g.zero_grad() noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1)) fake_img = netg(noises) output = netd(fake_img) error_g = criterion(output, true_labels) error_g.backward() optimizer_g.step() errorg_meter.add(error_g.data[0]) if opt.vis and ii % opt.plot_every == opt.plot_every - 1: ## 可视化 if os.path.exists(opt.debug_file): ipdb.set_trace() fix_fake_imgs = netg(fix_noises) vis.images(fix_fake_imgs.data.cpu().numpy()[:64] * 0.5 + 0.5, win='fixfake') vis.plot('error_d', errord_meter.value()[0]) vis.images(real_img.data.cpu().numpy()[:64] * 0.5 + 0.5, win='real') vis.plot('error_g', errorg_meter.value()[0]) if epoch % opt.decay_every == 0: # 保存模型、图片 tv.utils.save_image(fix_fake_imgs.data[:64], '%s/%s.png' % (opt.save_path, (epoch + opt.startpoint)), normalize=True, range=(-1, 1)) t.save(netd.state_dict(), 'checkpoints/netd_%s.pth' % (epoch + opt.startpoint)) t.save(netg.state_dict(), 'checkpoints/netg_%s.pth' % (epoch + opt.startpoint)) errord_meter.reset() errorg_meter.reset() optimizer_g = t.optim.Adam(netg.parameters(), opt.G_lr, betas=(opt.beta1, 0.999)) optimizer_d = t.optim.Adam(netd.parameters(), opt.D_lr, betas=(opt.beta1, 0.999))
def train(**kwargs): # opt.parse(kwargs) vis = Visualizer(opt.env) savingData = [] # # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) test_data = DogCat(opt.test_data_root, test=True) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch + 1): # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) test_cm, test_accuracy = val(model, test_dataloader) vis.plot('test_accuracy', test_accuracy) vis.plot('lr', lr) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm},test_cm:{test_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), test_cm=str(test_cm.value()), lr=lr)) print("epoch = ", epoch, " loss = ", loss_meter.value()[0], " lr = ", lr) batch_results = [(epoch, loss_meter.value()[0], lr, str(val_cm.value()), str(confusion_matrix.value()), str(test_cm.value()), val_accuracy, test_accuracy) ] # savingData += batch_results # save_training_data(savingData, opt.traingData_file) # # update learning rate # if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # # 第二种降低学习率的方法:不会有moment等信息的丢失 # for param_group in optimizer.param_groups: # param_group['lr'] = lr if epoch == opt.max_epoch: return previous_loss = loss_meter.value()[0] loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader), total=len(train_data) / opt.batch_size): # train model input = data target = label if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.item()) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() prefix = 'checkpoints/' name = time.strftime(prefix + '%m%d_%H:%M:%S_' + str(epoch + 1) + '.pth') if epoch == 0: model.save(name) if np.mod(epoch + 1, 10) == 0: model.save(name)
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: prepare data dh = DataHelper(opt.train_data_root, train=True) x_text, cuis, sentences_origin, y, vocabulary, vocabulary_inv = dh.load_data( ) x_train, x_val, y_train, y_val = train_test_split(x_text, y, test_size=0.3, random_state=1, shuffle=True) x_train = torch.from_numpy(x_train).long() y_train = torch.from_numpy(y_train).long() y_train = y_train.view(-1) train_data = TensorDataset(x_train, y_train) x_val = torch.from_numpy(x_val).long() y_val = torch.from_numpy(y_val).long() y_val = y_val.view(-1) val_data = TensorDataset(x_val, y_val) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step2: model if opt.mixing_train: pretrained_embeddings = emb_utils.load_mixing_embedding() else: pretrained_embeddings = emb_utils.load_words_embedding() model = getattr(models, opt.model)(vocab_size=len(vocabulary), pretrained_embeddings=pretrained_embeddings) # if opt.load_model_path: # model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # setp3 : loss function and optim criterion = nn.CrossEntropyLoss() lr = opt.lr # fix the emb parameters optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=opt.weight_decay) # step4 : CM loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(4) previous_loss = 1e100 print("train start...") # step5 : train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): # train_dataloader.batch_size # 训练模型参数d input = Variable(data) target = Variable(label) if opt.use_gpu: torch.cuda.set_device(opt.device) input = input.cuda() target = target.cuda() optimizer.zero_grad() if "LSTM" in opt.model or "RNN" in opt.model: score, _ = model(input.t_()) else: score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # 更新统计指标以及可视化 loss_meter.add(loss.data[0]) m = torch.max(score, 1)[1] confusion_matrix.add(m.view(target.size()).data, target.data) if (ii + 1) % opt.print_freq == 0: # 训练集指标可视化 cm_value = confusion_matrix.value() if not opt.together_calculate: result_p, result_i, result_o, result_n = vis.calculate_and_show( cm_value, together_calculate=False) data = [result_p, result_i, result_o, result_n] vis.plot_lprf_dependent(data, env="train") vis.plot('train_loss', loss_meter.value()[0]) else: train_accuracy, train_precision, train_recall, train_f1 = vis.calculate_and_show( cm_value) data = [ train_accuracy, train_precision, train_recall, train_f1 ] vis.plot_laprf(data, env="train") vis.plot('train_loss', loss_meter.value()[0]) model.save(epoch=epoch) # 计算验证集上的指标以及可视化 vocabulary_inv = {index: word for word, index in vocabulary.items()} val(model, val_dataloader, loss_meter, vis, epoch, vocabulary_inv) # 如果损失不再下降,则降低学习率 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) Model = getattr(models, opt.model) model = Model(40) if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() train_data = CGHData(opt.train_data_root, train=True) val_data = CGHData(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) criterion = nn.MSELoss() lr = opt.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) loss_meter = meter.AverageValueMeter() # confusion_matrix=meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(opt.max_epoch): loss_meter.reset() # confusion_matrix.reset() for k, (data, label) in enumerate(train_dataloader): # print(k) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # confusion_matrix.add(score.data, target.data) if k % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) model.save() vak_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format( epoch=epoch, loss=loss_meter.value()[0], lr=lr)) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(args, config): vis = Visualizer() train_set = MNIST(data_path=config.train_data_path, label_path=config.train_label_path, config=config, mode='train') valid_set = MNIST(data_path=config.train_data_path, label_path=config.train_label_path, config=config, mode='valid') train_dataloader = DataLoader(train_set, config.batch_size, shuffle=True, num_workers=config.num_workers) valid_dataloader = DataLoader(valid_set, config.batch_size, shuffle=False, num_workers=config.num_workers) model = getattr(network, args.model)().eval() if args.load_model_path: model.load(args.load_model_path) if args.use_gpu: model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.lr) train_loss_meter, valid_loss_meter = meter.AverageValueMeter( ), meter.AverageValueMeter() train_confusion_matrix, valid_confusion_matrix = meter.ConfusionMeter( 10), meter.ConfusionMeter(10) best_valid_loss = 1e5 best_epoch = 0 dist_to_best = 0 time_begin = time.clock() for epoch in range(config.epoch): # train model.train() train_loss_meter.reset() train_confusion_matrix.reset() for _iter, (train_data, train_target) in enumerate(train_dataloader): if args.use_gpu: train_data = train_data.cuda() train_target = train_target.cuda() optimizer.zero_grad() train_logits, train_output = model(train_data) train_loss = criterion(train_logits, train_target) train_loss.backward() optimizer.step() train_loss_meter.add(train_loss.item()) train_confusion_matrix.add(train_logits.data, train_target.data) if _iter % config.print_freq == 0: vis.plot('train_loss', train_loss_meter.value()[0]) model.save(path=os.path.join(args.ckpts_dir, 'model_{0}.pth'.format( str(epoch)))) # valid model.eval() valid_loss_meter.reset() valid_confusion_matrix.reset() for _iter, (valid_data, valid_target) in enumerate(valid_dataloader): if args.use_gpu: valid_data = valid_data.cuda() valid_target = valid_target.cuda() valid_logits, valid_output = model(valid_data) valid_loss = criterion(valid_logits, valid_target) valid_loss_meter.add(valid_loss.item()) valid_confusion_matrix.add(valid_logits.detach().squeeze(), valid_target.type(t.LongTensor)) valid_cm = valid_confusion_matrix.value() valid_accuracy = 100. * (valid_cm.diagonal().sum()) / (valid_cm.sum()) vis.plot('valid_accuracy', valid_accuracy) vis.log( "epoch:{epoch}, train_loss:{train_loss}, train_cm:{train_cm}, valid_loss:{valid_loss}, valid_cm:{valid_cm}, valid_accuracy:{valid_accuracy}" .format(epoch=epoch, train_loss=train_loss_meter.value()[0], train_cm=str(train_confusion_matrix.value()), valid_loss=valid_loss_meter.value()[0], valid_cm=str(valid_cm), valid_accuracy=valid_accuracy)) print( "epoch:{epoch}, train_loss:{train_loss}, valid_loss:{valid_loss}, valid_accuracy:{valid_accuracy}" .format(epoch=epoch, train_loss=train_loss_meter.value()[0], valid_loss=valid_loss_meter.value()[0], valid_accuracy=valid_accuracy)) print("train_cm:\n{train_cm}\n\nvalid_cm:\n{valid_cm}".format( train_cm=str(train_confusion_matrix.value()), valid_cm=str(valid_cm), )) # early stop if valid_loss_meter.value()[0] < best_valid_loss: best_epoch = epoch best_valid_loss = valid_loss_meter.value()[0] dist_to_best = 0 dist_to_best += 1 if dist_to_best > 4: break model.save(path=os.path.join(args.ckpts_dir, 'model.pth')) vis.save() print("save model successfully") print("best epoch: ", best_epoch) print("best valid loss: ", best_valid_loss) time_end = time.clock() print('time cost: %.2f' % (time_end - time_begin))
def train(): vis = Visualizer("Kesci" + time.strftime('%m%d%H%M')) train_data = AppData("../kesci/data/data_v3_23d/train_ab.json", iflabel=True) val_data = AppData("../kesci/data/data_v3_23d/val_ab.json", iflabel=True) train_dataloader = DataLoader(train_data, 256, shuffle=True, num_workers=4) val_dataloader = DataLoader(val_data, 512, shuffle=False, num_workers=2) test_data = AppData("../kesci/data/data_v3_23d/test_ab.json", iflabel=True) test_dataloader = DataLoader(test_data, 512, shuffle=False, num_workers=2) criterion = t.nn.BCEWithLogitsLoss().cuda() learning_rate = 0.002 weight_decay = 0.0003 model = DoubleSequence(31, 128, 1).cuda() optimizer = t.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(400): loss_meter.reset() confusion_matrix.reset() for ii, (data, property, target) in tqdm(enumerate(train_dataloader)): input = Variable(data).cuda() input2 = Variable(property).cuda() target = Variable(target).cuda() output = model(input, input2) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) if ii % 100 == 99: vis.plot('loss', loss_meter.value()[0]) if epoch % 3 == 2: train_cm, train_f1 = val(model, train_dataloader) vis.plot('train_f1', train_f1) val_cm, val_f1 = val(model, val_dataloader) vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate}) if loss_meter.value()[0] > previous_loss: learning_rate = learning_rate * 0.9 # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate previous_loss = loss_meter.value()[0] if epoch % 3 == 2: model.save() test_cm, test_f1 = val(model, test_dataloader) vis.plot('test_f1', test_f1) vis.log( "训练集:{train_f1:%}, {train_pre:%}, {train_rec:%} | 验证集:{val_f1:%}, {val_pre:%}, {val_rec:%} | \ 测试集:{test_f1:%}, {test_pre:%}, {test_rec:%} | {train_true_num:%}, {val_true_num:%}, {test_true_num:%}" .format( train_f1=train_f1, val_f1=val_f1, test_f1=test_f1, train_true_num=train_cm.value()[:, 0].sum() / len(train_data), val_true_num=val_cm.value()[:, 0].sum() / len(val_data), test_true_num=test_cm.value()[:, 0].sum() / len(test_data), train_pre=train_cm.value()[0][0] / train_cm.value()[0].sum(), train_rec=train_cm.value()[0][0] / train_cm.value()[:, 0].sum(), val_pre=val_cm.value()[0][0] / val_cm.value()[0].sum(), val_rec=val_cm.value()[0][0] / val_cm.value()[:, 0].sum(), test_pre=test_cm.value()[0][0] / test_cm.value()[0].sum(), test_rec=test_cm.value()[0][0] / test_cm.value()[:, 0].sum()))
def train(opt): model_train = getattr(model, opt.model)() vis = Visualizer(opt.env) if opt.load_model_path: model_train.load(opt.load_model_path) if opt.use_gpu: model_train.cuda() train_dataloader = dataloader(opt.train_data_root, train=True, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = dataloader(opt.valid_data_root, train=False, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) criterion = torch.nn.CrossEntropyLoss() lr = opt.lr optimizer = torch.optim.Adam(model_train.classifier.parameters(), lr=lr, weight_decay=opt.weight_decay) # meter loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(120) previous_loss = 1e100 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() if epoch == 20: model_train.set_requires_grad() for ii, (data, label) in tqdm(enumerate(train_dataloader)): if opt.use_gpu: data = data.cuda() label = label.cuda() optimizer.zero_grad() score = model_train(data) loss = criterion(score, label) loss.backward() optimizer.step() loss_meter.add(loss.item()) confusion_matrix.add(score.data, label.data) if ii % opt.print_freq: # print(ii, ' loss: ', loss_meter.value()[0]) vis.plot('loss', loss_meter.value()[0]) model_train.save(opt.save_model_path+opt.model+'_'+str(epoch)) # validate and visualize val_cm, val_accuracy = val(model_train, val_dataloader, opt) vis.plot('val_accuracy', val_accuracy) # vis.log() # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param in optimizer.param_groups: param['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): """根据命令行参数更新配置""" opt.parse(kwargs) vis = Visualizer(opt.env) """(1)step1:加载网络,若有预训练模型也加载""" model = getattr(models, opt.model)() """(2)step2:处理数据""" train_data = Ictal(opt.train_data_root, opt.model, train=True) # 训练集 val_data = Ictal(opt.train_data_root, opt.model, train=False) # 验证集 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) """(3)step3:定义损失函数和优化器""" criterion = t.nn.CrossEntropyLoss() # 交叉熵损失 lr = opt.lr # 学习率 optimizer = t.optim.SGD(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵""" loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 start = time.time() """(5)开始训练""" for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_dataloader): # 训练模型参数 input = Variable(data) if opt.model == 'CNN_1d': input = input.permute(0, 2, 1) target = Variable(label) # 梯度清零 optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() # 反向传播 # 更新参数 optimizer.step() # 更新统计指标及可视化 loss_meter.add(loss.item()) # print score.shape, target.shape confusion_matrix.add(score.detach(), target.detach()) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() model.save(epoch) """计算验证集上的指标及可视化""" val_cm, val_accuracy = val(model, val_dataloader, opt.model) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) tra_cm, tra_accuracy = val(model, train_dataloader, opt.model) print("epoch:", epoch, "loss:", loss_meter.value()[0], "val_accuracy:", val_accuracy, "tra_accuracy:", tra_accuracy) """如果损失不再下降,则降低学习率""" if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group["lr"] = lr previous_loss = loss_meter.value()[0] end = time.time() print(end - start)
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env, port=opt.vis_port) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load_new(opt.load_model_path) else: print('Initialize the model!') model.apply(weight_init) model.to(opt.device) # step2: data train_data = TextData(opt.data_root, opt.train_txt_path) val_data = TextData(opt.data_root, opt.val_txt_path) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): # train model input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() #for n, p in model.named_parameters(): # print(n) # h = p.register_hook(lambda grad: print(grad)) optimizer.step() # meters update and visualize loss_meter.add(loss.item()) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # enter debug mode if os.path.exists(opt.debug_file): ipdb.set_trace() if ii % (opt.print_freq * 10) == 0: vis.images(input.cpu().numpy(), opts=dict(title='Label', caption='Label'), win=1) print('Epoch: {} Iter: {} Loss: {}'.format(epoch, ii, loss)) if epoch % 2 == 0: model.save('./checkpoints/' + opt.env + '_' + str(epoch) + '.pth') # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) train_cm = confusion_matrix.value() t_accuracy = 100. * (train_cm[0][0] + train_cm[1][1]) / (train_cm.sum()) vis.plot('train_accuracy', t_accuracy) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): # load kwargs opt.parse(kwargs) print(kwargs) # visdom vis = Visualizer(opt.env) # vis log opt vis.log('user config:') for k, v in opt.__class__.__dict__.items(): if not k.startswith('__'): vis.log('{} {}'.format(k, getattr(opt, k))) # config model model = getattr(models, opt.model)() if opt.use_pretrained_model: model = load_pretrained() if opt.load_model_path: # load exist model model.load(opt.load_model_path) elif opt.use_weight_init: # we need init weight # model.apply(weight_init) # if use GPU if opt.use_gpu: model.cuda() # genearte_data train_data = Flower(train=True) val_data = Flower(train=False) test_data = Flower(test=True) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # criterion and optimizer criterion = torch.nn.CrossEntropyLoss() lr = opt.lr if 'Dense' in opt.model: optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=opt.weight_decay) else: optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # meters loss_meter = meter.AverageValueMeter() # 17 classes confusion_matrix = meter.ConfusionMeter(17) previous_loss = 1e100 # best_accuracy = 0 # start training for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for bactch_index, (data, label) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) # gpu update if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # update meter loss_meter.add(loss.data[0]) # print(score.data, target.data) # [batch_size, 17] [batch_size] confusion_matrix.add(score.data, target.data) # plot if bactch_index % opt.print_freq == opt.print_freq - 1: # cross_entropy print('loss ', loss_meter.value()[0]) # visualize loss vis.plot('loss', loss_meter.value()[0]) # save model for this epoch if opt.use_pretrained_model is False and epoch % opt.save_freq == 0: model.save() # validate val_cm, val_accuracy = val(model, val_dataloader) # test test_cm, test_accuracy = val(model, test_dataloader) # plot validation accuracy print('Epoch {}/{}: val_accuracy {}'.format(epoch, opt.max_epoch, val_accuracy)) # plot vis vis.plot('val_accuracy', val_accuracy) vis.plot('test_accuracy', test_accuracy) vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}'.format( epoch=epoch, loss=loss_meter.value()[0], lr=lr)) # vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format( # epoch=epoch, loss=loss_meter.value()[0], val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr) # ) # update best validation model if val_accuracy > best_accuracy: best_accuracy = val_accuracy torch.save(model.state_dict(), './checkpoints/best_{}.pth'.format(opt.model)) if opt.use_pretrained_model is False: model.save('./checkpoints/best_{}.pth'.format( model.model_name)) # update learning rate for this epoch if float(loss_meter.value()[0]) > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] print('Best model validation accuracy {}'.format(best_accuracy))
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if opt.retrain: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size, shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size, shuffle=False,num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii,(data,label) in tqdm(enumerate(train_dataloader),total=len(train_data)): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.item()) confusion_matrix.add(score.data, target.data) if ii%opt.print_freq==opt.print_freq-1: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() model.save(opt.load_model_path) # validate and visualize val_cm,val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
#--update_netd-- Update D network: Ladv = |f(real) - f(fake)|_2 #self.pred_real, self.feat_real = self.netd(self.input) #self.pred_fake, self.feat_fake = self.netd(self.fake.detach()) netd.zero_grad() fake, latent_i, latent_o = netg(img_st) out_d_real, feat_true = netd(img_st) out_d_fake, feat_fake = netd(fake.detach()) err_d = .5 * criterion_BCE( out_d_real, y_real_) + .5 * criterion_BCE( out_d_fake, y_fake_) #+ criterion_L2(feat_real, feat_fake) err_d.backward(retain_graph=True) optimizer_d.step() optimizer_f.step() errord_meter.add(err_d.data.cpu().numpy()) vis.plot('errord', errord_meter.value()[0]) # If D loss is zero, then re-initialize netD if err_d.item() < 1e-5: netd.apply(weights_init) #--update_netg-- Update G network: log(D(G(x))) + ||G(x) - x|| netg.zero_grad() #out_g, _ = netd(fake) err_g_bce = criterion_L2(feat_true, feat_fake) # l_adv err_g_l1l = criterion_L1(fake, img_st) # l_con err_g_enc = criterion_L2(latent_i, latent_o) # l_enc err_g = err_g_bce * config.w_bce + err_g_l1l * config.w_rec + err_g_enc * config.w_enc err_g.backward() optimizer_g.step() optimizer_f.step() errorg_meter.add(err_g.data.cpu().numpy())
def train(**kwargs): opt.parse(**kwargs) # step1: configure model model = getattr(models,opt.model)(opt.num_class) if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogCat(opt.train_data_path, transform=opt.train_transform, train = True) val_data = DogCat(opt.train_data_path, transform=opt.test_val_transform, train = False, test= False) train_dataloader = DataLoader(train_data, batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(params=model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() # 用于统计一个epoch内的平均误差 confusion_matrix = meter.ConfusionMeter(opt.num_class) previous_loss=1e6 # step5: train vis = Visualizer(opt.env) for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii,(data, label) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() loss_meter.add(loss.data) confusion_matrix.add(score.data, target.data) # ipdb.set_trace() if ii%opt.print_freq == opt.print_freq-1: vis.plot(win='loss', y=loss_meter.value()[0]) model.save() # step6: validate and visualize val_confusion_matrix, val_accuracy = val(model, val_dataloader) vis.plot(win='val_accuracy',y=val_accuracy) vis.log(win='log_text', info= 'epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format( epoch=epoch,lr=lr,loss=loss_meter.value()[0],train_cm=str(confusion_matrix.value()),val_cm=str(val_confusion_matrix) ) ) # step7: update learning_rate if loss_meter.value()[0] > previous_loss: lr=lr*opt.lr_decay for param_group in optimizer.param_groups: param_group['lr']=lr previous_loss=loss_meter.value()[0]