def train(args): scale = 2 torch.backends.cudnn.benchmark = True # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='region') v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols), task='region') n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=4, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=4, shuffle=False) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom(env='nyu2_coarse') depth_window = vis.image( np.random.rand(480 // scale, 640 // scale), opts=dict(title='depth!', caption='depth.'), ) accurate_window = vis.image( np.random.rand(480 // scale, 640 // scale), opts=dict(title='accurate!', caption='accurate.'), ) ground_window = vis.image( np.random.rand(480 // scale, 640 // scale), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image( np.random.rand(480 // scale, 640 // scale), opts=dict(title='img!', caption='img.'), ) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) lin_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='linear Loss', legend=['linear error'])) error_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='error', legend=['Error'])) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,betas=(0.9,0.999),amsgrad=True) optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.90) # scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.5) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = log_loss trained = 0 #scale=100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] print(best_error) print(trained) loss_rec = np.load('/home/lidong/Documents/RSCFN/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:199 * trained] test = 0 #exit() trained = 0 else: best_error = 100 best_error_r = 100 trained = 0 print('random initialize') print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from rsn!') rsn = torch.load( '/home/lidong/Documents/RSCFN/rsn_cluster_nyu2_124_1.103912coarse_best_model.pkl', map_location='cpu') model_dict = model.state_dict() #print(model_dict) pre_dict = { k: v for k, v in rsn['model_state'].items() if k in model_dict and rsn['model_state'].items() } #pre_dict={k: v for k, v in rsn.items() if k in model_dict and rsn.items()} #print(pre_dict) key = [] for k, v in pre_dict.items(): if v.shape != model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) # #print(pre_dict) model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer.load_state_dict(rsn['optimizer_state']) trained = rsn['epoch'] best_error = rsn['error'] print('load success!') print(best_error) best_error += 1 #del rsn test = 0 # loss_rec=np.load('/home/lidong/Documents/RSCFN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:199*trained] #exit() # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #scheduler.step() #trained print('training!') model.train() for i, (images, labels, regions, segments, image) in enumerate(trainloader): #break images = Variable(images.cuda()) labels = Variable(labels.cuda()) segments = Variable(segments.cuda()) regions = Variable(regions.cuda()) optimizer.zero_grad() #depth,feature,loss_var,loss_dis,loss_reg = model(images,segments) #depth,loss_var,loss_dis,loss_reg = model(images,segments) #depth,masks,loss_var,loss_dis,loss_reg = model(images,segments,1,'train') depth, accurate = model(images, regions, 1, 'eval') print('depth', torch.mean(depth).item()) print('accurate', torch.mean(accurate).item()) print('ground', torch.mean(labels).item()) loss_d = log_loss(depth, labels) #loss_i=berhu_log(intial,labels) loss_a = berhu_log(accurate, labels) #loss_d=log_loss(depth,labels) #loss=log_loss(depth, labels) loss = loss_d #loss=torch.sum(loss_var)+torch.sum(loss_dis)+0.001*torch.sum(loss_reg) #loss=loss/4+loss_d #loss/=feature.shape[0] # depth = model(images,segments) # loss_d=berhu(depth,labels) lin = torch.sqrt(torch.mean(torch.pow(accurate - labels, 2))) # loss=loss_d if loss.item() > 10: loss = loss / 10 loss.backward() optimizer.step() #print(torch.mean(depth).item()) if args.visdom: with torch.no_grad(): vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 199, Y=loss.item() * torch.ones(1).cpu(), win=loss_window, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 199, Y=lin.item() * torch.ones(1).cpu(), win=lin_window, update='append') ground = labels.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = (np.reshape(ground, [ 480 // scale, 640 // scale ]).astype('float32')) / (np.max(ground) + 0.001) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) accurate = accurate.data.cpu().numpy().astype('float32') accurate = accurate[0, ...] accurate = np.abs( (np.reshape(accurate, [480 // scale, 640 // scale]).astype('float32')) / (np.max(accurate) + 0.001) - ground) vis.image( accurate, opts=dict(title='accurate!', caption='accurate.'), win=accurate_window, ) depth = depth.data.cpu().numpy().astype('float32') depth = depth[0, :, :, :] #depth=np.where(depth>np.max(ground),np.max(ground),depth) depth = (np.reshape( depth, [480 // scale, 640 // scale ]).astype('float32')) / (np.max(depth) + 0.001) vis.image( depth, opts=dict(title='depth!', caption='depth.'), win=depth_window, ) image = image.data.cpu().numpy().astype('float32') image = image[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) image = np.reshape( image, [3, 480 // scale, 640 // scale]).astype('float32') vis.image( image, opts=dict(title='image!', caption='image.'), win=image_window, ) loss_rec.append([ i + epoch * 199, torch.Tensor([loss.item()]).unsqueeze(0).cpu() ]) print( "data [%d/199/%d/%d] Loss: %.4f d: %.4f loss_d:%.4f loss_a:%.4f" % (i, epoch, args.n_epoch, loss.item(), lin.item(), loss_d.item(), loss_a.item())) # print("data [%d/199/%d/%d] Loss: %.4f linear: %.4f " % (i, epoch, args.n_epoch,loss.item(),lin.item() # )) # state = {'epoch': epoch+1, # 'model_state': model.state_dict(), # 'optimizer_state': optimizer.state_dict(), # } # torch.save(state, "{}_{}_{}_pretrain_best_model.pkl".format( # args.arch, args.dataset,str(epoch))) # print('save success') # np.save('/home/lidong/Documents/RSCFN/loss.npy',loss_rec) if epoch > 50: check = 3 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.5) else: check = 5 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=1) if epoch > 70: check = 2 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=0.25) if epoch > 90: check = 1 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.1) # check=1 #epoch=3 if epoch % check == 0: print('testing!') model.train() loss_ave = [] loss_d_ave = [] loss_lin_ave = [] loss_r_ave = [] for i_val, (images_val, labels_val, regions, segments, image) in tqdm(enumerate(valloader)): #print(r'\n') images_val = Variable(images_val.cuda(), requires_grad=False) labels_val = Variable(labels_val.cuda(), requires_grad=False) segments_val = Variable(segments.cuda(), requires_grad=False) regions_val = Variable(regions.cuda(), requires_grad=False) with torch.no_grad(): #depth,loss_var,loss_dis,loss_reg = model(images_val,segments_val,1,'test') depth, accurate = model(images_val, regions_val, 1, 'eval') # loss_d=berhu(depth,labels_val) # loss=torch.sum(loss_var)+torch.sum(loss_dis)+0.001*torch.sum(loss_reg) # loss=loss+loss_d lin = torch.sqrt( torch.mean(torch.pow(accurate - labels_val, 2))) loss_ave.append(lin.data.cpu().numpy()) #print('error:') #print(loss_ave[-1]) print("error=%.4f" % (lin.item())) # print("loss_d=%.4f loss_var=%.4f loss_dis=%.4f loss_reg=%.4f"%(torch.sum(lin).item()/4,torch.sum(loss_var).item()/4, \ # torch.sum(loss_dis).item()/4,0.001*torch.sum(loss_reg).item()/4)) if args.visdom: vis.line(X=torch.ones(1).cpu() * i_val + torch.ones(1).cpu() * test * 163, Y=lin.item() * torch.ones(1).cpu(), win=error_window, update='append') ground = labels_val.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = (np.reshape(ground, [ 480 // scale, 640 // scale ]).astype('float32')) / (np.max(ground) + 0.001) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) accurate = accurate.data.cpu().numpy().astype('float32') accurate = accurate[0, ...] accurate = np.abs( (np.reshape(accurate, [480 // scale, 640 // scale]).astype('float32')) - ground) accurate = accurate / (np.max(accurate) + 0.001) vis.image( accurate, opts=dict(title='accurate!', caption='accurate.'), win=accurate_window, ) depth = depth.data.cpu().numpy().astype('float32') depth = depth[0, :, :, :] #depth=np.where(depth>np.max(ground),np.max(ground),depth) depth = (np.reshape( depth, [480 // scale, 640 // scale ]).astype('float32')) / (np.max(depth) + 0.001) vis.image( depth, opts=dict(title='depth!', caption='depth.'), win=depth_window, ) image = image.data.cpu().numpy().astype('float32') image = image[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) image = np.reshape( image, [3, 480 // scale, 640 // scale]).astype('float32') vis.image( image, opts=dict(title='image!', caption='image.'), win=image_window, ) error = np.mean(loss_ave) #error_d=np.mean(loss_d_ave) #error_lin=np.mean(loss_lin_ave) #error_rate=np.mean(error_rate) print("error_r=%.4f" % (error)) test += 1 if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_{}coarse_best_model.pkl".format( args.arch, args.dataset, str(epoch), str(error))) print('save success') np.save('/home/lidong/Documents/RSCFN/loss.npy', loss_rec) if epoch % 10 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_coarse_model.pkl".format(args.arch, args.dataset, str(epoch))) print('save success')
def train(args): scale = 2 cuda_id = 0 torch.backends.cudnn.benchmark = True # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='region') v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols), task='region') train_len = t_loader.length / args.batch_size trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=False) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom(env='nyu_memory_retrain') memory_retrain_window = vis.image( np.random.rand(228, 304), opts=dict(title='depth!', caption='depth.'), ) accurate_window = vis.image( np.random.rand(228, 304), opts=dict(title='accurate!', caption='accurate.'), ) ground_window = vis.image( np.random.rand(228, 304), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image( np.random.rand(228, 304), opts=dict(title='img!', caption='img.'), ) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) lin_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='linear Loss', legend=['linear error'])) error_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='error', legend=['Error'])) # Setup Model model = get_model(args.arch) # model = torch.nn.DataParallel( # model, device_ids=range(torch.cuda.device_count())) model = torch.nn.DataParallel(model, device_ids=[1]) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda(1) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, betas=(0.9, 0.999), amsgrad=True) # optimizer = torch.optim.SGD( # model.parameters(), lr=args.l_rate,momentum=0.90) # scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.5) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = log_loss trained = 0 #scale=100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] + 0.1 mean_loss = checkpoint['mean_loss'] #mean_loss=checkpoint['error'] print(best_error) print(trained) print(mean_loss) # loss_rec=np.load('/home/lidong/Documents/RSCFN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:train_len*trained] test = 0 #exit() #trained=0 else: best_error = 100 best_error_r = 100 trained = 0 mean_loss = 10.0 print('random initialize') print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from rsn!') rsn = torch.load( '/home/lidong/Documents/RSCFN/memory_retrain_rsn_cluster_nyu_4_0.5681759_coarse_best_model.pkl', map_location='cpu') model_dict = model.state_dict() #print(model_dict) pre_dict = { k: v for k, v in rsn['model_state'].items() if k in model_dict and rsn['model_state'].items() } #pre_dict={k: v for k, v in rsn.items() if k in model_dict and rsn.items()} #print(pre_dict) key = [] for k, v in pre_dict.items(): if v.shape != model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) #print(pre_dict) # pre_dict['module.regress1.0.conv1.1.weight']=pre_dict['module.regress1.0.conv1.1.weight'][:,:256,:,:] # pre_dict['module.regress1.0.downsample.1.weight']=pre_dict['module.regress1.0.downsample.1.weight'][:,:256,:,:] model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer.load_state_dict(rsn['optimizer_state']) trained = rsn['epoch'] best_error = rsn['error'] + 0.5 mean_loss = best_error / 2 print('load success!') print(best_error) #best_error+=1 #del rsn test = 0 trained = 0 # loss_rec=np.load('/home/lidong/Documents/RSCFN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:train_len*trained] #exit() zero = torch.zeros(1).cuda(1) one = torch.ones(1).cuda(1) # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #scheduler.step() #trained print('training!') model.train() loss_error = 0 loss_error_d = 0 mean_loss_ave = [] for i, (images, labels, regions, segments, image) in enumerate(trainloader): #break # if i==100: # break images = Variable(images.cuda(1)) labels = Variable(labels.cuda(1)) segments = Variable(segments.cuda(1)) regions = Variable(regions.cuda(1)) iterative_count = 0 while (True): #mask = (labels > 0) optimizer.zero_grad() depth, accurate, loss_var, loss_dis, loss_reg = model( images, regions, labels, 0, 'train') #depth,loss_var,loss_dis,loss_reg = model(images,segments) #depth,masks,loss_var,loss_dis,loss_reg = model(images,segments,1,'train') # depth,accurate = model(images,regions,0,'eval') labels = labels.view_as(depth) segments = segments.view_as(depth) regions = regions.view_as(depth) mask = (labels > alpha) & (labels < beta) mask = mask.float().detach() #print(torch.sum(mask)) # print('depth',torch.mean(depth).item(),torch.min(depth).item(),torch.max(depth).item()) # print('accurate',torch.mean(accurate).item(),torch.min(accurate).item(),torch.max(accurate).item()) # print('ground',torch.mean(labels).item(),torch.min(labels).item(),torch.max(labels).item()) loss_d = berhu(depth, labels, mask) #loss_d=relative_loss(depth,labels,mask) #loss_i=berhu_log(intial,labels) loss_a = berhu(accurate, labels, mask) loss_v = v_loss(accurate, depth, labels, mask) #print(depth.requires_grad) print('mean_variance:%.4f,max_variance:%.4f' % ((torch.sum(torch.abs(accurate - depth)) / torch.sum(mask)).item(), torch.max(torch.abs(accurate - depth)).item())) #loss_a=relative_loss(accurate,labels,mask) #loss_d=log_loss(depth,labels) # loss_a=log_loss(depth[mask],labels[mask]) # loss_d=log_loss(accurate[mask],labels[mask]) # if epoch<30: # loss=0.3*loss_d+0.7*loss_a+loss_v # else: # loss=loss_a #loss=0.3*loss_d+0.35*loss_a+0.35*loss_v #loss=0.7*loss_a+0.4*loss_d-0.1*loss_v #loss=loss_a+0.3*loss_d+0.1*(loss_a-loss_d)+0.5*loss_v loss = loss_a + 0.3 * loss_d + 0.3 * loss_v #loss=loss_a #mask=mask.float() #mask=(labels>alpha)&(labels<beta)&(labels<torch.max(labels))&(labels>torch.min(labels)) #loss=loss+0.5*(torch.sum(loss_var)+torch.sum(loss_dis)+0.001*torch.sum(loss_reg)) #loss=loss/4+loss_d #loss/=feature.shape[0] # depth = model(images,segments) # loss_d=berhu(depth,labels) #lin=torch.sqrt(torch.mean(torch.pow(accurate[mask]-labels[mask],2))) #print(torch.min(accurate),torch.max(accurate)) #exit() accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) labels = torch.where(labels > beta, beta * one, labels) labels = torch.where(labels < alpha, alpha * one, labels) depth = torch.where(depth > beta, beta * one, depth) depth = torch.where(depth < alpha, alpha * one, depth) lin = torch.mean( torch.sqrt( torch.sum(torch.where(mask > 0, torch.pow(accurate - labels, 2), mask).view(labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1))) lin_d = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(depth - labels, 2), mask).view( labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1))) lin = lin.detach() #print(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(accurate-labels,2),mask).view(labels.shape[0],-1),dim=-1)/torch.sum(mask.view(labels.shape[0],-1),dim=-1))) #log_d=torch.sqrt(torch.mean(torch.pow(torch.log10(depth[mask])-torch.log10(labels[mask]),2))) log_d = torch.mean( torch.sum(torch.where( mask > 0, torch.abs(torch.log10(accurate) - torch.log10(labels)), mask).view(labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1)) #print(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(torch.log10(labels)-torch.log10(accurate),2),mask).view(labels.shape[0],-1),dim=-1)/torch.sum(mask.view(labels.shape[0],-1),dim=-1))) #print(torch.sum(mask.view(labels.shape[0],-1),dim=-1)) #accurate=torch.where(accurate>torch.mean(accurate)*4,torch.mean(accurate)*4,accurate) #depth=torch.where(depth>torch.mean(depth)*4,torch.mean(accurate)*4,depth) #exit() # loss.backward() # mean_loss_ave.append(lin.item()) # optimizer.step() # break if epoch <= trained + 2: loss.backward() mean_loss_ave.append(lin.item()) optimizer.step() break if (lin <= mean_loss): #loss_bp=loss*torch.pow(100,-(mean_loss-lin)/mean_loss) #loss_bp=loss*zero print('no back') loss = 0.1 * loss #optimizer.step() loss.backward() mean_loss_ave.append(lin.item()) optimizer.step() break else: print( torch.pow( 10, torch.min(one, (lin - mean_loss) / mean_loss)).item()) print('back') #loss_bp=loss*torch.pow(10,torch.min(one,(lin-mean_loss)/mean_loss)) #mean_loss_ave.append(loss.item()) loss.backward() optimizer.step() #break # print(loss-mean_loss) # print(torch.exp(loss-mean_loss).item()) # loss=loss*torch.exp(loss-mean_loss) # loss.backward() # optimizer.step() #loss=loss/torch.pow(100,(loss-mean_loss)/loss) #break # if epoch==trained: # mean_loss_ave.append(loss.item()) # break # if i==0: # mean_loss=loss.item() #or ((loss-mean_loss)/mean_loss<0.2) if lin <= mean_loss or iterative_count > 8: mean_loss_ave.append(lin.item()) # mean_loss=np.mean(mean_loss_ave) break else: iterative_count += 1 print("repeat data [%d/%d/%d/%d] Loss: %.4f lin: %.4f " % (i, train_len, epoch, args.n_epoch, loss.item(), lin.item())) #print(torch.mean(depth).item()) if args.visdom: with torch.no_grad(): vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * train_len, Y=loss.item() * torch.ones(1).cpu(), win=loss_window, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * train_len, Y=lin.item() * torch.ones(1).cpu(), win=lin_window, update='append') #labels=F.interpolate(labels,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze() ground = labels.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = (np.reshape(ground, [228, 304]).astype('float32') ) / (np.max(ground) + 0.001) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) depth = accurate.data.cpu().numpy().astype('float32') depth = depth[0, :, :] #depth=np.where(depth>np.max(ground),np.max(ground),depth) depth = np.where( ground > 0, np.abs( (np.reshape(depth, [228, 304]).astype('float32')) / (np.max(depth) + 0.001) - ground), 0) depth = depth / (np.max(depth) + 0.001) vis.image( depth, opts=dict(title='depth!', caption='depth.'), win=memory_retrain_window, ) accurate = accurate.data.cpu().numpy().astype('float32') accurate = accurate[0, ...] accurate = (np.reshape(accurate, [228, 304]).astype( 'float32')) / (np.max(accurate) + 0.001) vis.image( accurate, opts=dict(title='accurate!', caption='accurate.'), win=accurate_window, ) image = image.data.cpu().numpy().astype('float32') image = image[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) image = np.reshape(image, [3, 228, 304]).astype('float32') vis.image( image, opts=dict(title='image!', caption='image.'), win=image_window, ) loss_rec.append([ i + epoch * train_len, torch.Tensor([loss.item()]).unsqueeze(0).cpu() ]) loss_error += loss.item() loss_error_d += log_d.item() print("data [%d/%d/%d/%d] Loss: %.4f lin: %.4f lin_d:%.4f loss_d:%.4f loss_a:%.4f loss_var:%.4f loss_dis:%.4f loss_reg: %.4f" % (i,train_len, epoch, args.n_epoch,loss.item(),lin.item(),lin_d.item(), loss_d.item(),loss_a.item(), \ torch.sum(0.3*loss_v).item(),torch.sum(0.3*(loss_a-loss_d)).item(),0.001*torch.sum(loss_reg).item())) if (i + 1) % (1000) == 0: mean_loss = np.mean(mean_loss_ave) mean_loss_ave = [] print("mean_loss:%.4f" % (mean_loss)) print('testing!') model.eval() loss_ave = [] loss_d_ave = [] loss_lin_ave = [] loss_r_ave = [] loss_log_ave = [] for i_val, (images_val, labels_val, regions, segments, images) in tqdm(enumerate(valloader)): #print(r'\n') images_val = Variable(images_val.cuda(1), requires_grad=False) labels_val = Variable(labels_val.cuda(1), requires_grad=False) segments_val = Variable(segments.cuda(1), requires_grad=False) regions_val = Variable(regions.cuda(1), requires_grad=False) with torch.no_grad(): #depth,loss_var,loss_dis,loss_reg = model(images_val,segments_val,1,'test') depth, accurate, loss_var, loss_dis, loss_reg = model( images_val, regions_val, labels_val, 0, 'eval') # loss_d=berhu(depth,labels_val) # loss=torch.sum(loss_var)+torch.sum(loss_dis)+0.001*torch.sum(loss_reg) # loss=loss+loss_d accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) labels_val = torch.where(labels_val > beta, beta * one, labels_val) labels_val = torch.where(labels_val < alpha, alpha * one, labels_val) depth = torch.where(depth > beta, beta * one, depth) depth = torch.where(depth < alpha, alpha * one, depth) depth = F.interpolate(depth, scale_factor=scale, mode='nearest').squeeze() accurate = F.interpolate(accurate, scale_factor=scale, mode='nearest').squeeze() labels_val = (labels_val[..., 6 * scale:-6 * scale, 8 * scale:-8 * scale]).view_as(depth) #accurate=torch.where(accurate>torch.mean(accurate)*4,torch.mean(accurate),accurate) mask = (labels_val > alpha) & (labels_val < beta) mask = mask.float().detach() #lin=torch.sqrt(torch.mean(torch.pow(accurate[mask]-labels_val[mask],2))) #lin=torch.sum(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(accurate-labels_val,2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) #lin=torch.sum(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(accurate-labels_val,2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) lin = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) lin_d = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(depth - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) #log_d=torch.sqrt(torch.mean(torch.pow(torch.log10(accurate[mask])-torch.log10(labels_val[mask]),2))) #print(torch.min(depth),torch.max(depth),torch.mean(depth)) log_d = torch.mean( torch.sum(torch.where( mask > 0, torch.abs( torch.log10(accurate) - torch.log10(labels_val)), mask).view( labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1)) #print(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(accurate-labels_val,2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) #log_d=torch.sum(torch.sum(torch.where(mask>0,torch.abs(torch.log10(accurate)-torch.log10(labels_val)),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1)) #print(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(torch.log10(accurate)-torch.log10(labels_val),2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) # if (lin<0.5) & (log_d>0.1): # np.save('/home/lidong/Documents/RSCFN/analysis.npy',[labels_val.data.cpu().numpy().astype('float32'),accurate.data.cpu().numpy().astype('float32')]) # exit() #accurate=torch.where(accurate>torch.mean(accurate)*4,torch.mean(accurate)*4,accurate) #depth=torch.where(depth>torch.mean(depth)*4,torch.mean(accurate)*4,depth) # if accurate.shape[0]==4: # a=torch.sqrt(torch.mean(torch.pow(accurate[0,...]-labels_val[0,...],2))) # b=torch.sqrt(torch.mean(torch.pow(accurate[1,...]-labels_val[1,...],2))) # c=torch.sqrt(torch.mean(torch.pow(accurate[2,...]-labels_val[2,...],2))) # d=torch.sqrt(torch.mean(torch.pow(accurate[3,...]-labels_val[3,...],2))) # lin=(a+b+c+d)/4 # else: # a=torch.sqrt(torch.mean(torch.pow(accurate[0,...]-labels_val[0,...],2))) # b=torch.sqrt(torch.mean(torch.pow(accurate[1,...]-labels_val[1,...],2))) # lin=(a+b)/2 loss_ave.append(lin.data.cpu().numpy()) loss_d_ave.append(lin_d.data.cpu().numpy()) loss_log_ave.append(log_d.data.cpu().numpy()) #print('error:') #print(loss_ave[-1]) #print(torch.max(torch.abs(accurate[mask]-labels_val[mask])).item(),torch.min(torch.abs(accurate[mask]-labels_val[mask])).item()) print("error=%.4f,error_d=%.4f,error_log=%.4f" % (lin.item(), lin_d.item(), log_d.item())) # print("loss_d=%.4f loss_var=%.4f loss_dis=%.4f loss_reg=%.4f"%(torch.sum(lin).item()/4,torch.sum(loss_var).item()/4, \ # torch.sum(loss_dis).item()/4,0.001*torch.sum(loss_reg).item()/4)) if args.visdom: vis.line( X=torch.ones(1).cpu() * i_val + torch.ones(1).cpu() * test * 654 / args.batch_size, Y=lin.item() * torch.ones(1).cpu(), win=error_window, update='append') labels_val = labels_val.unsqueeze(1) labels_val = F.interpolate(labels_val, scale_factor=1 / 2, mode='nearest').squeeze() accurate = accurate.unsqueeze(1) accurate = F.interpolate(accurate, scale_factor=1 / 2, mode='nearest').squeeze() depth = depth.unsqueeze(1) depth = F.interpolate(depth, scale_factor=1 / 2, mode='nearest').squeeze() ground = labels_val.data.cpu().numpy().astype( 'float32') ground = ground[0, :, :] ground = (np.reshape(ground, [228, 304]).astype( 'float32')) / (np.max(ground) + 0.001) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) depth = accurate.data.cpu().numpy().astype('float32') depth = depth[0, :, :] #depth=np.where(depth>np.max(ground),np.max(ground),depth) depth = np.where( ground > 0, np.abs((np.reshape(depth, [228, 304]).astype( 'float32')) / (np.max(depth) + 0.001) - ground), 0) depth = depth / (np.max(depth) + 0.001) vis.image( depth, opts=dict(title='depth!', caption='depth.'), win=memory_retrain_window, ) accurate = accurate.data.cpu().numpy().astype( 'float32') accurate = accurate[0, ...] accurate = (np.reshape(accurate, [228, 304]).astype('float32')) accurate = accurate / (np.max(accurate) + 0.001) vis.image( accurate, opts=dict(title='accurate!', caption='accurate.'), win=accurate_window, ) image = images.data.cpu().numpy().astype('float32') image = image[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) image = np.reshape(image, [3, 228, 304]).astype('float32') vis.image( image, opts=dict(title='image!', caption='image.'), win=image_window, ) model.train() #error=np.mean(loss_ave) error = np.mean(loss_ave) #error_d=np.mean(loss_d_ave) #error_lin=np.mean(loss_lin_ave) #error_rate=np.mean(error_rate) print("error_r=%.4f,error_d=%.4f,log_error=%.4f" % (error, np.mean(loss_d_ave), np.mean(loss_log_ave))) test += 1 if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, 'mean_loss': mean_loss, } torch.save( state, "/home/lidong/Documents/RSCFN/memory/memory_retrain_{}_{}_{}_{}_coarse_best_model.pkl" .format(args.arch, args.dataset, str(epoch), str(error))) print('save success') np.save('/home/lidong/Documents/RSCFN/loss.npy', loss_rec) mean_loss = np.mean(mean_loss_ave) mean_loss_ave = [] print("mean_loss:%.4f" % (mean_loss)) if epoch > 50: check = 3 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.5) else: check = 5 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=1) if epoch > 70: check = 2 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=0.25) if epoch > 90: check = 1 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.1) check = 1 #epoch=10 if epoch % check == 0: print('testing!') model.eval() loss_ave = [] loss_d_ave = [] loss_lin_ave = [] loss_log_ave = [] loss_r_ave = [] error_sum = 0 for i_val, (images_val, labels_val, regions, segments, images) in tqdm(enumerate(valloader)): #print(r'\n') images_val = Variable(images_val.cuda(1), requires_grad=False) labels_val = Variable(labels_val.cuda(1), requires_grad=False) segments_val = Variable(segments.cuda(1), requires_grad=False) regions_val = Variable(regions.cuda(1), requires_grad=False) with torch.no_grad(): #depth,loss_var,loss_dis,loss_reg = model(images_val,segments_val,1,'test') depth, accurate, loss_var, loss_dis, loss_reg = model( images_val, regions_val, labels_val, 0, 'eval') # loss_d=berhu(depth,labels_val) # loss=torch.sum(loss_var)+torch.sum(loss_dis)+0.001*torch.sum(loss_reg) # loss=loss+loss_d accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) labels_val = torch.where(labels_val > beta, beta * one, labels_val) labels_val = torch.where(labels_val < alpha, alpha * one, labels_val) depth = torch.where(depth > beta, beta * one, depth) depth = torch.where(depth < alpha, alpha * one, depth) depth = F.interpolate(depth, scale_factor=scale, mode='nearest').squeeze() accurate = F.interpolate(accurate, scale_factor=scale, mode='nearest').squeeze() labels_val = (labels_val[..., 6 * scale:-6 * scale, 8 * scale:-8 * scale]).view_as(depth) #accurate=torch.where(accurate>torch.mean(accurate)*4,torch.mean(accurate),accurate) mask = (labels_val > alpha) & (labels_val < beta) mask = mask.float().detach() #lin=torch.sqrt(torch.mean(torch.pow(accurate[mask]-labels_val[mask],2))) #lin=torch.sum(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(accurate-labels_val,2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) #lin=torch.sum(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(accurate-labels_val,2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) lin = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) lin_d = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(depth - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) error_sum += torch.sum( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) #log_d=torch.sqrt(torch.mean(torch.pow(torch.log10(accurate[mask])-torch.log10(labels_val[mask]),2))) #print(torch.min(depth),torch.max(depth),torch.mean(depth)) log_d = torch.mean( torch.sum(torch.where( mask > 0, torch.abs( torch.log10(accurate) - torch.log10(labels_val)), mask).view( labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1)) #print(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(accurate-labels_val,2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) #log_d=torch.sum(torch.sum(torch.where(mask>0,torch.abs(torch.log10(accurate)-torch.log10(labels_val)),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1)) #print(torch.sqrt(torch.sum(torch.where(mask>0,torch.pow(torch.log10(accurate)-torch.log10(labels_val),2),mask).view(labels_val.shape[0],-1),dim=-1)/torch.sum(mask.view(labels_val.shape[0],-1),dim=-1))) # if (lin<0.5) & (log_d>0.1): # np.save('/home/lidong/Documents/RSCFN/analysis.npy',[labels_val.data.cpu().numpy().astype('float32'),accurate.data.cpu().numpy().astype('float32')]) # exit() #accurate=torch.where(accurate>torch.mean(accurate)*4,torch.mean(accurate)*4,accurate) #depth=torch.where(depth>torch.mean(depth)*4,torch.mean(accurate)*4,depth) # if accurate.shape[0]==4: # a=torch.sqrt(torch.mean(torch.pow(accurate[0,...]-labels_val[0,...],2))) # b=torch.sqrt(torch.mean(torch.pow(accurate[1,...]-labels_val[1,...],2))) # c=torch.sqrt(torch.mean(torch.pow(accurate[2,...]-labels_val[2,...],2))) # d=torch.sqrt(torch.mean(torch.pow(accurate[3,...]-labels_val[3,...],2))) # lin=(a+b+c+d)/4 # else: # a=torch.sqrt(torch.mean(torch.pow(accurate[0,...]-labels_val[0,...],2))) # b=torch.sqrt(torch.mean(torch.pow(accurate[1,...]-labels_val[1,...],2))) # lin=(a+b)/2 loss_ave.append(lin.data.cpu().numpy()) loss_d_ave.append(lin_d.data.cpu().numpy()) loss_log_ave.append(log_d.data.cpu().numpy()) #print('error:') #print(loss_ave[-1]) #print(torch.max(torch.abs(accurate[mask]-labels_val[mask])).item(),torch.min(torch.abs(accurate[mask]-labels_val[mask])).item()) print("error=%.4f,error_d=%.4f,error_log=%.4f" % (lin.item(), lin_d.item(), log_d.item())) # print("loss_d=%.4f loss_var=%.4f loss_dis=%.4f loss_reg=%.4f"%(torch.sum(lin).item()/4,torch.sum(loss_var).item()/4, \ # torch.sum(loss_dis).item()/4,0.001*torch.sum(loss_reg).item()/4)) if args.visdom: vis.line( X=torch.ones(1).cpu() * i_val + torch.ones(1).cpu() * test * 654 / args.batch_size, Y=lin.item() * torch.ones(1).cpu(), win=error_window, update='append') labels_val = labels_val.unsqueeze(1) labels_val = F.interpolate(labels_val, scale_factor=1 / 2, mode='nearest').squeeze() accurate = accurate.unsqueeze(1) accurate = F.interpolate(accurate, scale_factor=1 / 2, mode='nearest').squeeze() depth = depth.unsqueeze(1) depth = F.interpolate(depth, scale_factor=1 / 2, mode='nearest').squeeze() ground = labels_val.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = (np.reshape(ground, [228, 304]).astype('float32') ) / (np.max(ground) + 0.001) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) depth = accurate.data.cpu().numpy().astype('float32') depth = depth[0, :, :] #depth=np.where(depth>np.max(ground),np.max(ground),depth) depth = np.where( ground > 0, np.abs( (np.reshape(depth, [228, 304]).astype('float32')) / (np.max(depth) + 0.001) - ground), 0) depth = depth / (np.max(depth) + 0.001) vis.image( depth, opts=dict(title='depth!', caption='depth.'), win=memory_retrain_window, ) accurate = accurate.data.cpu().numpy().astype('float32') accurate = accurate[0, ...] accurate = (np.reshape(accurate, [228, 304]).astype('float32')) accurate = accurate / (np.max(accurate) + 0.001) vis.image( accurate, opts=dict(title='accurate!', caption='accurate.'), win=accurate_window, ) image = images.data.cpu().numpy().astype('float32') image = image[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) image = np.reshape(image, [3, 228, 304]).astype('float32') vis.image( image, opts=dict(title='image!', caption='image.'), win=image_window, ) #error=np.mean(loss_ave) error = np.mean(loss_ave) #error_d=np.mean(loss_d_ave) #error_lin=np.mean(loss_lin_ave) #error_rate=np.mean(error_rate) print("error_r=%.4f,error_d=%.4f,error_log=%.4f" % (error, np.mean(loss_d_ave), np.mean(loss_log_ave))) test += 1 print(error_sum / 654) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, 'mean_loss': mean_loss, } torch.save( state, "/home/lidong/Documents/RSCFN/memory/memory_retrain_{}_{}_{}_{}_coarse_best_model.pkl" .format(args.arch, args.dataset, str(epoch), str(error))) print('save success') np.save('/home/lidong/Documents/RSCFN/loss.npy', loss_rec) #exit() if epoch % 30 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, 'mean_loss': mean_loss, } torch.save( state, "/home/lidong/Documents/RSCFN/memory/memory_retrain_{}_{}_{}_coarse_model.pkl" .format(args.arch, args.dataset, str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec=[] best_error=2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train_region', img_size=(args.img_rows, args.img_cols),task='region') v_loader = data_loader(data_path, is_transform=True, split='test_region', img_size=(args.img_rows, args.img_cols),task='region') n_classes = t_loader.n_classes trainloader = data.DataLoader( t_loader, batch_size=args.batch_size, num_workers=4, shuffle=True) valloader = data.DataLoader( v_loader, batch_size=args.batch_size, num_workers=4) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() old_window = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Trained Loss', legend=['Loss'])) loss_window = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) pre_window = vis.image( np.random.rand(480, 640), opts=dict(title='predict!', caption='predict.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel( model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD( model.parameters(), lr=args.l_rate,momentum=0.99, weight_decay=5e-4) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = region_log trained=0 scale=100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) trained=checkpoint['epoch'] best_error=checkpoint['error'] #best_error=5 #print('load success!') loss_rec=np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec=list(loss_rec) loss_rec=loss_rec[:816*trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec)/816)): if args.visdom: #print(loss_rec[l]) vis.line( X=torch.ones(1).cpu() * loss_rec[l*816][0], Y=np.mean(np.array(loss_rec[l*816:(l+1)*816])[:,1])*torch.ones(1).cpu(), win=old_window, update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from resnet34!') resnet34=torch.load('/home/lidong/Documents/RSDEN/RSDEN/resnet34-333f7ec4.pth') model_dict=model.state_dict() pre_dict={k: v for k, v in resnet34.items() if k in model_dict} model_dict.update(pre_dict) model.load_state_dict(model_dict) print('load success!') best_error=1 trained=0 #best_error=5 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #trained print('training!') model.train() for i, (images, labels,segments) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) segments = Variable(segments.cuda()) optimizer.zero_grad() outputs = model(images) #outputs=outputs loss = loss_fn(input=outputs, target=labels,instance=segments) # print('training:'+str(i)+':learning_rate'+str(loss.data.cpu().numpy())) loss.backward() optimizer.step() # print(torch.Tensor([loss.data[0]]).unsqueeze(0).cpu()) #print(loss.item()*torch.ones(1).cpu()) #nyu2_train:246,nyu2_all:816 if args.visdom: vis.line( X=torch.ones(1).cpu() * i+torch.ones(1).cpu() *(epoch-trained)*816, Y=loss.item()*torch.ones(1).cpu(), win=loss_window, update='append') pre = outputs.data.cpu().numpy().astype('float32') pre = pre[0, :, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32')-np.min(pre))/(np.max(pre)-np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict!', caption='predict.'), win=pre_window, ) ground=labels.data.cpu().numpy().astype('float32') #print(ground.shape) ground = ground[0, :, :] ground = (np.reshape(ground, [480, 640]).astype('float32')-np.min(ground))/(np.max(ground)-np.min(ground)) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) loss_rec.append([i+epoch*816,torch.Tensor([loss.item()]).unsqueeze(0).cpu()]) print("data [%d/816/%d/%d] Loss: %.4f" % (i, epoch, args.n_epoch,loss.item())) if epoch>50: check=3 else: check=5 if epoch>70: check=2 if epoch>85: check=1 if epoch%check==0: print('testing!') model.train() error_lin=[] error_log=[] error_va=[] error_rate=[] error_absrd=[] error_squrd=[] thre1=[] thre2=[] thre3=[] variance=[] for i_val, (images_val, labels_val,segments) in tqdm(enumerate(valloader)): print(r'\n') images_val = Variable(images_val.cuda(), requires_grad=False) labels_val = Variable(labels_val.cuda(), requires_grad=False) segments = Variable(segments.cuda(), requires_grad=False) with torch.no_grad(): outputs = model(images_val) pred = outputs.data.cpu().numpy() gt = labels_val.data.cpu().numpy() instance = segments.data.cpu().numpy() ones=np.ones((gt.shape)) zeros=np.zeros((gt.shape)) pred=np.reshape(pred,(gt.shape)) instance=np.reshape(instance,(gt.shape)) #gt=np.reshape(gt,[4,480,640]) # dis=np.square(gt-pred) # error_lin.append(np.sqrt(np.mean(dis))) # dis=np.square(np.log(gt)-np.log(pred)) # error_log.append(np.sqrt(np.mean(dis))) var=0 linear=0 log_dis=0 for i in range(1,int(np.max(instance)+1)): pre_region=np.where(instance==i,pred,0) dis=np.where(instance==i,np.abs(gt-pred),0) num=np.sum(np.where(instance==i,1,0)) m=np.sum(pre_region)/num pre_region=np.where(instance==i,pred-m,0) pre_region=np.sum(np.square(pre_region))/num log_region=np.where(instance==i,np.abs(np.log(gt+1e-6)-np.log(pred+1e-6)),0) var+=pre_region linear+=np.sum(dis)/num log_dis+=np.sum(log_region)/num error_log.append(log_dis/np.max(instance)) error_lin.append(linear/np.max(instance)) variance.append(var/np.max(instance)) print("error_lin=%.4f,error_log=%.4f,variance=%.4f"%( error_lin[i_val], error_log[i_val], variance[i_val])) # alpha=np.mean(np.log(gt)-np.log(pred)) # dis=np.square(np.log(pred)-np.log(gt)+alpha) # error_va.append(np.mean(dis)/2) # dis=np.mean(np.abs(gt-pred))/gt # error_absrd.append(np.mean(dis)) # dis=np.square(gt-pred)/gt # error_squrd.append(np.mean(dis)) # thelt=np.where(pred/gt>gt/pred,pred/gt,gt/pred) # thres1=1.25 # thre1.append(np.mean(np.where(thelt<thres1,ones,zeros))) # thre2.append(np.mean(np.where(thelt<thres1*thres1,ones,zeros))) # thre3.append(np.mean(np.where(thelt<thres1*thres1*thres1,ones,zeros))) # #a=thre1[i_val] # #error_rate.append(np.mean(np.where(dis<0.6,ones,zeros))) # print("error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f"%( # error_lin[i_val], # error_log[i_val], # error_va[i_val], # error_absrd[i_val], # error_squrd[i_val], # thre1[i_val], # thre2[i_val], # thre3[i_val])) error=np.mean(error_lin) variance=np.mean(variance) #error_rate=np.mean(error_rate) print("error=%.4f,variance=%.4f"%(error,variance)) if error<= best_error: best_error = error state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error,} torch.save(state, "{}_{}_best_model.pkl".format( args.arch, args.dataset)) print('save success') np.save('/home/lidong/Documents/RSDEN/RSDEN//loss.npy',loss_rec) if epoch%5==0: #best_error = error state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error,} torch.save(state, "{}_{}_{}_model.pkl".format( args.arch, args.dataset,str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec=[] best_error=2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train_region', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='test_region', img_size=(args.img_rows, args.img_cols)) n_classes = t_loader.n_classes trainloader = data.DataLoader( t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader( v_loader, batch_size=args.batch_size, num_workers=2) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() # old_window = vis.line(X=torch.zeros((1,)).cpu(), # Y=torch.zeros((1)).cpu(), # opts=dict(xlabel='minibatches', # ylabel='Loss', # title='Trained Loss', # legend=['Loss'])) loss_window1 = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss1', legend=['Loss1'])) loss_window2 = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss2', legend=['Loss'])) loss_window3 = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss3', legend=['Loss3'])) pre_window1 = vis.image( np.random.rand(480, 640), opts=dict(title='predict1!', caption='predict1.'), ) pre_window2 = vis.image( np.random.rand(480, 640), opts=dict(title='predict2!', caption='predict2.'), ) pre_window3 = vis.image( np.random.rand(480, 640), opts=dict(title='predict3!', caption='predict3.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) cuda0=torch.device('cuda:0') cuda1=torch.device('cuda:1') cuda2=torch.device('cuda:2') cuda3=torch.device('cuda:3') # Setup Model rsnet = get_model('rsnet') rsnet = torch.nn.DataParallel(rsnet, device_ids=[0,1]) rsnet.cuda(cuda0) drnet=get_model('drnet') drnet = torch.nn.DataParallel(drnet, device_ids=[2,3]) drnet.cuda(cuda2) parameters=list(rsnet.parameters())+list(drnet.parameters()) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(drnet.module, 'optimizer'): optimizer = drnet.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD( parameters, lr=args.l_rate,momentum=0.99, weight_decay=5e-4) if hasattr(rsnet.module, 'loss'): print('Using custom loss') loss_fn = rsnet.module.loss else: loss_fn = l1_r trained=0 scale=100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) trained=checkpoint['epoch'] best_error=checkpoint['error'] #print('load success!') loss_rec=np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec=list(loss_rec) loss_rec=loss_rec[:3265*trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec)/3265)): if args.visdom: vis.line( X=torch.ones(1).cpu() * loss_rec[l*3265][0], Y=np.mean(np.array(loss_rec[l*3265:(l+1)*3265])[:,1])*torch.ones(1).cpu(), win=old_window, update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize seperately!') checkpoint=torch.load('/home/lidong/Documents/RSDEN/RSDEN/rsnet_nyu_best_model.pkl') rsnet.load_state_dict(checkpoint['model_state']) trained=checkpoint['epoch'] print('load success from rsnet %.d'%trained) checkpoint=torch.load('/home/lidong/Documents/RSDEN/RSDEN/drnet_nyu_best_model.pkl') drnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained=checkpoint['epoch'] print('load success from drnet %.d'%trained) trained=0 best_error=checkpoint['error'] # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch):
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='region') v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols), task='region') n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=4, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=4) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() depth_window = vis.image( np.random.rand(480, 640), opts=dict(title='depth!', caption='depth.'), ) cluster_window = vis.image( np.random.rand(480, 640), opts=dict(title='cluster!', caption='cluster.'), ) region_window = vis.image( np.random.rand(480, 640), opts=dict(title='region!', caption='region.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) old_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Trained Loss', legend=['Loss'])) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.90, weight_decay=5e-4) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = log_loss trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] #best_error_d=checkpoint['error_d'] best_error_d = checkpoint['error_d'] print(best_error) print(trained) loss_rec = np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:179 * trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec) / 179)): if args.visdom: vis.line( X=torch.ones(1).cpu() * loss_rec[l * 179][0], Y=np.mean( np.array(loss_rec[l * 179:(l + 1) * 179])[:, 1]) * torch.ones(1).cpu(), win=old_window, update='append') #exit() else: best_error = 100 best_error_d = 100 trained = 0 print('random initialize') """ print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from rsn!') rsn=torch.load('/home/lidong/Documents/RSDEN/RSDEN/depth_rsn_cluster_nyu2_best_model.pkl',map_location='cpu') model_dict=model.state_dict() #print(model_dict) #pre_dict={k: v for k, v in rsn['model_state'].items() if k in model_dict and rsn['model_state'].items()} pre_dict={k: v for k, v in rsn.items() if k in model_dict and rsn.items()} key=[] for k,v in pre_dict.items(): if v.shape!=model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) model_dict.update(pre_dict) model.load_state_dict(model_dict) #trained=rsn['epoch'] #best_error=rsn['error'] #best_error_d=checkpoint['error_d'] #best_error_d=rsn['error_d'] print('load success!') print(best_error) print(trained) print(best_error_d) del rsn """ # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #trained print('training!') model.train() for i, (images, labels, regions, segments) in enumerate(trainloader): #break images = Variable(images.cuda()) labels = Variable(labels.cuda()) segments = Variable(segments.cuda()) regions = Variable(regions.cuda()) optimizer.zero_grad() # depth,feature,loss_var,loss_dis,loss_reg = model(images,segments) # loss_d=l2(depth,labels) # loss=torch.sum(loss_var)+torch.sum(loss_dis)+0.001*torch.sum(loss_reg) # loss=loss/4+loss_d # loss/=2 depth = model(images, segments) loss_d = berhu(depth, labels) lin = l2(depth, labels) loss = loss_d loss.backward() optimizer.step() if loss.item() <= 0.000001: feature = feature.data.cpu().numpy().astype('float32')[0, ...] feature = np.reshape( feature, [1, feature.shape[0], feature.shape[1], feature.shape[2]]) feature = np.transpose(feature, [0, 2, 3, 1]) print(feature.shape) #feature = feature[0,...] masks = get_instance_masks(feature, 0.7) print(masks.shape) #cluster = masks[0] cluster = np.sum(masks, axis=0) cluster = (np.reshape(cluster, [480, 640]).astype('float32') - np.min(cluster)) / (np.max(cluster) - np.min(cluster) + 1) vis.image( cluster, opts=dict(title='cluster!', caption='cluster.'), win=cluster_window, ) if args.visdom: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 179, Y=loss.item() * torch.ones(1).cpu(), win=loss_window, update='append') depth = depth.data.cpu().numpy().astype('float32') depth = depth[0, :, :, :] depth = (np.reshape(depth, [480, 640]).astype('float32') - np.min(depth)) / (np.max(depth) - np.min(depth) + 1) vis.image( depth, opts=dict(title='depth!', caption='depth.'), win=depth_window, ) region = regions.data.cpu().numpy().astype('float32') region = region[0, ...] region = (np.reshape(region, [480, 640]).astype('float32') - np.min(region)) / (np.max(region) - np.min(region) + 1) vis.image( region, opts=dict(title='region!', caption='region.'), win=region_window, ) ground = labels.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = (np.reshape(ground, [480, 640]).astype('float32') - np.min(ground)) / (np.max(ground) - np.min(ground) + 1) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) loss_rec.append([ i + epoch * 179, torch.Tensor([loss.item()]).unsqueeze(0).cpu() ]) # print("data [%d/179/%d/%d] Loss: %.4f loss_var: %.4f loss_dis: %.4f loss_reg: %.4f loss_d: %.4f" % (i, epoch, args.n_epoch,loss.item(), \ # torch.sum(loss_var).item()/4,torch.sum(loss_dis).item()/4,0.001*torch.sum(loss_reg).item()/4,loss_d.item())) print("data [%d/179/%d/%d] Loss: %.4f linear: %.4f " % (i, epoch, args.n_epoch, loss.item(), lin.item())) if epoch > 30: check = 3 else: check = 5 if epoch > 50: check = 2 if epoch > 70: check = 1 #epoch=3 if epoch % check == 0: print('testing!') model.eval() loss_ave = [] loss_d_ave = [] loss_lin_ave = [] for i_val, (images_val, labels_val, regions, segments) in tqdm(enumerate(valloader)): #print(r'\n') images_val = Variable(images_val.cuda(), requires_grad=False) labels_val = Variable(labels_val.cuda(), requires_grad=False) segments_val = Variable(segments.cuda(), requires_grad=False) regions_val = Variable(regions.cuda(), requires_grad=False) with torch.no_grad(): #depth,feature,loss_var,loss_dis,loss_reg = model(images_val,segments_val) depth = model(images_val, segments_val) # loss=torch.sum(loss_var)+torch.sum(loss_dis)+0.001*torch.sum(loss_reg) # loss=loss/4 loss_d = log_loss(input=depth, target=labels_val) loss_d = torch.sqrt(loss_d) loss_lin = l2(depth, labels_val) loss_lin = torch.sqrt(loss_lin) # loss_r=(loss+loss_d)/2 # loss_ave.append(loss_r.data.cpu().numpy()) loss_d_ave.append(loss_d.data.cpu().numpy()) loss_lin_ave.append(loss_lin.data.cpu().numpy()) print('error:') print(loss_d_ave[-1]) # print(loss_ave[-1]) print(loss_lin_ave[-1]) #exit() # feature = feature.data.cpu().numpy().astype('float32')[0,...] # feature=np.reshape(feature,[1,feature.shape[0],feature.shape[1],feature.shape[2]]) # feature=np.transpose(feature,[0,2,3,1]) # #print(feature.shape) # #feature = feature[0,...] # masks=get_instance_masks(feature, 0.7) # #print(len(masks)) # cluster = np.array(masks) # cluster=np.sum(masks,axis=0) # cluster = np.reshape(cluster, [480, 640]).astype('float32')/255 # vis.image( # cluster, # opts=dict(title='cluster!', caption='cluster.'), # win=cluster_window, # ) # ground=segments.data.cpu().numpy().astype('float32') # ground = ground[0, :, :] # ground = (np.reshape(ground, [480, 640]).astype('float32')-np.min(ground))/(np.max(ground)-np.min(ground)+1) # vis.image( # ground, # opts=dict(title='ground!', caption='ground.'), # win=ground_window, # ) #error=np.mean(loss_ave) error_d = np.mean(loss_d_ave) error_lin = np.mean(loss_lin_ave) #error_rate=np.mean(error_rate) print("error_d=%.4f error_lin=%.4f" % (error_d, error_lin)) #exit() #continue # if error_d<= best_error: # best_error = error # state = {'epoch': epoch+1, # 'model_state': model.state_dict(), # 'optimizer_state': optimizer.state_dict(), # 'error': error, # 'error_d': error_d, # } # torch.save(state, "{}_{}_best_model.pkl".format( # args.arch, args.dataset)) # print('save success') # np.save('/home/lidong/Documents/RSDEN/RSDEN/loss.npy',loss_rec) if error_lin <= best_error: best_error = error_lin state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error_lin, 'error_d': error_d, } torch.save( state, "depth_{}_{}_best_model.pkl".format( args.arch, args.dataset)) print('save success') np.save('/home/lidong/Documents/RSDEN/RSDEN/loss.npy', loss_rec) if epoch % 15 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error_lin, 'error_d': error_d, } torch.save( state, "depth_{}_{}_{}_model.pkl".format(args.arch, args.dataset, str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train_region', img_size=(args.img_rows, args.img_cols), task='visualize') v_loader = data_loader(data_path, is_transform=True, split='visual', img_size=(args.img_rows, args.img_cols), task='visualize') n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization cuda0 = torch.device('cuda:0') cuda1 = torch.device('cuda:1') cuda2 = torch.device('cuda:2') cuda3 = torch.device('cuda:3') # Setup Model rsnet = get_model('rsnet') rsnet = torch.nn.DataParallel(rsnet, device_ids=[0]) rsnet.cuda(cuda0) drnet = get_model('drnet') drnet = torch.nn.DataParallel(drnet, device_ids=[2]) drnet.cuda(cuda2) parameters = list(rsnet.parameters()) + list(drnet.parameters()) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(drnet.module, 'optimizer'): optimizer = drnet.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD(parameters, lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(rsnet.module, 'loss'): print('Using custom loss') loss_fn = rsnet.module.loss else: loss_fn = l1_r trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] #print('load success!') loss_rec = np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:1632 * trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec) / 1632)): if args.visdom: vis.line( X=torch.ones(1).cpu() * loss_rec[l * 1632][0], Y=np.mean( np.array(loss_rec[l * 1632:(l + 1) * 1632])[:, 1]) * torch.ones(1).cpu(), win=old_window, update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize seperately!') checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/exp1/region/trained/rsnet_nyu_best_model.pkl' ) rsnet.load_state_dict(checkpoint['model_state']) trained = checkpoint['epoch'] print('load success from rsnet %.d' % trained) best_error = checkpoint['error'] checkpoint = torch.load( '//home/lidong/Documents/RSDEN/RSDEN/exp1/seg/drnet_nyu_best_model.pkl' ) drnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] print('load success from drnet %.d' % trained) trained = 0 min_loss = 10 samples = [] # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): rsnet.train() drnet.train() if epoch % 1 == 0: print('testing!') rsnet.train() drnet.train() error_lin = [] error_log = [] error_va = [] error_rate = [] error_absrd = [] error_squrd = [] thre1 = [] thre2 = [] thre3 = [] for i_val, (images, labels, segments, sample) in tqdm(enumerate(valloader)): #print(r'\n') images = images.cuda(cuda2) labels = labels.cuda(cuda2) segments = segments.cuda(cuda2) optimizer.zero_grad() #print(i_val) with torch.no_grad(): #region_support = rsnet(images) coarse_depth = torch.cat([images, segments], 1) #coarse_depth=torch.cat([coarse_depth,segments],1) outputs = drnet(coarse_depth) #print(outputs[2].item()) pred = [ outputs[0].data.cpu().numpy(), outputs[1].data.cpu().numpy(), outputs[2].data.cpu().numpy() ] pred = np.array(pred) #print(pred.shape) #pred=region_support.data.cpu().numpy() gt = labels.data.cpu().numpy() ones = np.ones((gt.shape)) zeros = np.zeros((gt.shape)) pred = np.reshape( pred, (gt.shape[0], gt.shape[1], gt.shape[2], 3)) #pred=np.reshape(pred,(gt.shape)) print(np.max(pred)) #print(gt.shape) #print(pred.shape) #gt=np.reshape(gt,[4,480,640]) dis = np.square(gt - pred[:, :, :, 2]) #dis=np.square(gt-pred) loss = np.sqrt(np.mean(dis)) #print(min_loss) if min_loss > 0: #print(loss) min_loss = loss #pre=pred[:,:,0] #region_support=region_support.item() #rgb=rgb #segments=segments #labels=labels.item() #sample={'loss':loss,'rgb':rgb,'region_support':region_support,'ground_r':segments,'ground_d':labels} #samples.append(sample) #pred=pred.item() #pred=pred[0,:,:] #pred=pred/np.max(pred)*255 #pred=pred.astype(np.uint8) #print(pred.shape) #cv2.imwrite('/home/lidong/Documents/RSDEN/RSDEN/exp1/pred/seg%.d.png'%(i_val),pred) np.save( '/home/lidong/Documents/RSDEN/RSDEN/exp1/pred/seg%.d.npy' % (i_val), pred) np.save( '/home/lidong/Documents/RSDEN/RSDEN/exp1/visual/seg%.d.npy' % (i_val), sample) break
def train(args): scale = 2 cuda_id = 0 torch.backends.cudnn.benchmark = True # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='region') v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols), task='region') train_len = t_loader.length / args.batch_size trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=False) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom(env='nyu_memory_depth') memory_depth_window = vis.image( np.random.rand(228, 304), opts=dict(title='depth!', caption='depth.'), ) accurate_window = vis.image( np.random.rand(228, 304), opts=dict(title='accurate!', caption='accurate.'), ) ground_window = vis.image( np.random.rand(228, 304), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image( np.random.rand(228, 304), opts=dict(title='img!', caption='img.'), ) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) lin_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='linear Loss', legend=['linear error'])) error_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='error', legend=['Error'])) # Setup Model model = get_model(args.arch) memory = get_model('memory') # model = torch.nn.DataParallel( # model, device_ids=range(torch.cuda.device_count())) model = torch.nn.DataParallel(model, device_ids=[2, 3]) model.cuda(2) memory = torch.nn.DataParallel(memory, device_ids=[2, 3]) memory.cuda(2) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, betas=(0.9, 0.999), amsgrad=True) optimizer2 = torch.optim.Adam(memory.parameters(), lr=args.l_rate, betas=(0.9, 0.999), amsgrad=True) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = log_loss trained = 0 #scale=100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] + 0.1 mean_loss = best_error / 2 print(best_error) print(trained) # loss_rec=np.load('/home/lidong/Documents/RSCFN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:train_len*trained] test = 0 #exit() #trained=0 else: best_error = 100 best_error_r = 100 trained = 0 mean_loss = 1.0 print('random initialize') print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from rsn!') rsn = torch.load( '/home/lidong/Documents/RSCFN/memory_depth_rsn_cluster_nyu_0_0.59483826_coarse_best_model.pkl', map_location='cpu') model_dict = model.state_dict() #print(model_dict) pre_dict = { k: v for k, v in rsn['model_state'].items() if k in model_dict and rsn['model_state'].items() } #pre_dict={k: v for k, v in rsn.items() if k in model_dict and rsn.items()} #print(pre_dict) key = [] for k, v in pre_dict.items(): if v.shape != model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) #print(pre_dict) # pre_dict['module.regress1.0.conv1.1.weight']=pre_dict['module.regress1.0.conv1.1.weight'][:,:256,:,:] # pre_dict['module.regress1.0.downsample.1.weight']=pre_dict['module.regress1.0.downsample.1.weight'][:,:256,:,:] model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer.load_state_dict(rsn['optimizer_state']) trained = rsn['epoch'] best_error = rsn['error'] + 0.5 #mean_loss=best_error/2 print('load success!') print(best_error) #best_error+=1 #del rsn test = 0 trained = 0 # loss_rec=np.load('/home/lidong/Documents/RSCFN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:train_len*trained] #exit() zero = torch.zeros(1).cuda(2) one = torch.ones(1).cuda(2) # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #scheduler.step() #trained print('training!') model.train() mean_loss_ave = [] #memory_bank=torch.ones(1) if epoch == trained: #initlization print('initilization') #model_t=model for i, (images, labels, regions, segments, image, index) in enumerate(trainloader): images = Variable(images.cuda(2)) labels = Variable(labels.cuda(2)) segments = Variable(segments.cuda(2)) regions = Variable(regions.cuda(2)) index = Variable(index.cuda(2)) iterative_count = 0 with torch.no_grad(): optimizer.zero_grad() optimizer2.zero_grad() accurate, feature = model(images, regions, labels, 0, 'memory') feature = feature.detach() #print(feature.shape) #exit() representation = memory(feature) labels = labels.view_as(accurate) segments = segments.view_as(accurate) regions = regions.view_as(accurate) mask = (labels > alpha) & (labels < beta) mask = mask.float().detach() loss_a = berhu(accurate, labels, mask) loss = loss_a accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) lin = torch.sqrt( torch.sum(torch.where(mask > 0, torch.pow(accurate - labels, 2), mask).view(labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1)) log_d = torch.sum(torch.where( mask > 0, torch.abs(torch.log10(accurate) - torch.log10(labels)), mask).view(labels.shape[0], -1), dim=-1) / (torch.sum(mask.view( labels.shape[0], -1), dim=-1) + 1) #loss.backward() # optimizer.step() # optimizer2.step() print(i, index, lin) loss_rec.append([ i + epoch * train_len, torch.Tensor([loss.item()]).unsqueeze(0).cpu() ]) print("data [%d/%d/%d/%d] Loss: %.4f lin: %.4f log_d:%.4f loss_a:%.4f " % \ (i,train_len, epoch, args.n_epoch,loss.item(), \ torch.mean(lin).item(),torch.mean(log_d).item(), loss_a.item())) if i == 0: memory_bank = representation index_bank = index loss_bank = lin else: memory_bank = torch.cat([memory_bank, representation], dim=0) index_bank = torch.cat([index_bank, index], dim=0) loss_bank = torch.cat([loss_bank, lin], dim=0) if i > 0: break else: #train print('training_fc') for i, (images, labels, regions, segments, image, index) in enumerate(trainloader): #model_t=model images = Variable(images.cuda(2)) labels = Variable(labels.cuda(2)) segments = Variable(segments.cuda(2)) regions = Variable(regions.cuda(2)) index = Variable(index.cuda(2)) iterative_count = 0 optimizer.zero_grad() optimizer2.zero_grad() accurate, feature = model(images, regions, labels, 0, 'memory') feature = feature.detach() representation = memory(feature) labels = labels.view_as(accurate) segments = segments.view_as(accurate) regions = regions.view_as(accurate) mask = (labels > alpha) & (labels < beta) mask = mask.float().detach() loss_a = berhu(accurate, labels, mask) loss = loss_a accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) lin = torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels, 2), mask).view( labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1)) log_d = torch.sum( torch.where( mask > 0, torch.abs(torch.log10(accurate) - torch.log10(labels)), mask).view(labels.shape[0], -1), dim=-1) / ( torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1) loss.backward() optimizer.step() lin = lin.detach() loss_m = memory_loss(representation, re_repre, lin.detach(), re_loss) #loss=loss_a+loss_m loss_m.backward() #optimizer.step() optimizer2.step() loss_rec.append([ i + epoch * train_len, torch.Tensor([loss.item()]).unsqueeze(0).cpu() ]) print("data [%d/%d/%d/%d] Loss: %.4f lin: %.4f log_d:%.4f loss_a:%.4f loss_m:%.4f" % \ (i,train_len, epoch, args.n_epoch,loss.item(), \ torch.mean(lin).item(),torch.mean(log_d).item(), loss_a.item(),loss_m.item())) if i == 0: memory_bank = representation index_bank = index loss_bank = lin else: memory_bank = torch.cat([memory_bank, representation], dim=0) index_bank = torch.cat([index_bank, index], dim=0) loss_bank = torch.cat([loss_bank, lin], dim=0) if i > 0: break # print(index_bank) # print(loss_bank) # exit(0) #print(memory_bank.shape,index_bank.shape) # if epoch==trained: #sigma=torch.mean(loss_bank)/train_len*10 #sigma=(torch.max(loss_bank)-torch.min(loss_bank))/294 with torch.no_grad(): re_index = [] re_loss = [] re_repre = [] print('update memory') while (True): #print(loss_bank.shape) candidate = loss_bank.nonzero() if candidate.shape[0] == 0: break #print(candidate.shape) t_index = candidate[torch.randint(low=0, high=candidate.shape[0], size=(1, ))][0][0] #print(t_index) t_loss = loss_bank[t_index] #print('search') sigma = t_loss * 0.1 while (True): t_related = torch.where( torch.abs(loss_bank - t_loss) < sigma, one, zero).nonzero() # if t_related.shape[0]==1: # t_related=torch.where(torch.abs(loss_bank-t_loss)<sigma*2,one,zero).nonzero() #print(loss_bank[t_related]) t_loss2 = torch.mean(loss_bank[t_related]) #print(t_loss2) if t_loss == t_loss2: loss_bank[t_related] = zero break else: t_loss = t_loss2 #break #print('end') #print(index_bank[t_related]) #print(loss_bank[t_related]) re_index.append(index_bank[t_related]) re_loss.append(torch.mean(t_loss2)) re_repre.append(torch.mean(memory_bank[t_related], dim=0)) # re_check=[] # for re in range(len(re_index)): # if len(re_index[re])==1: # re_check.append(re) # for re in range(len(re_check)): # t_loss=re_loss[re_check[re]] # t_loss=torch.abs(re_loss-t_loss) # t_loss[re_check[re]]=re_loss[re_check[re]] # torch.argmin(t_loss) re_index = re_index re_loss = torch.stack(re_loss) re_repre = torch.stack(re_repre).squeeze() #exit() #print(re_index,re_loss) #exit(0) continue if epoch > 50: check = 3 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.5) else: check = 5 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=1) if epoch > 70: check = 2 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=0.25) if epoch > 90: check = 1 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.1) check = 1 #epoch=10 if epoch % check == 0 and epoch > trained: print('testing!') loss_ave = [] loss_d_ave = [] loss_lin_ave = [] loss_log_ave = [] loss_r_ave = [] error_sum = 0 for i_val, (images_val, labels_val, regions, segments, images) in tqdm(enumerate(valloader)): images_val = Variable(images_val.cuda(2), requires_grad=False) labels_val = Variable(labels_val.cuda(2), requires_grad=False) segments_val = Variable(segments.cuda(2), requires_grad=False) regions_val = Variable(regions.cuda(2), requires_grad=False) model_t = model with torch.no_grad(): model_t.eval() accurate, feature = model_t(images_val, regions, labels, 0, 'memory') feature = feature.detach() representation = memory(feature) labels_val = labels_val.view_as(accurate) target_index = torch.argmax( torch.nn.functional.softmax(-torch.mean( torch.pow(re_repre - representation, 2), dim=1), dim=0)) retrain_samples = re_index[target_index] rt_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='region', index_bank=retrain_samples) rtrainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=True) while (True): model_t.train() loss_t = 0 for i, (images, labels, regions, segments, image, index) in enumerate(rtrainloader): images = Variable(images.cuda(2)) labels = Variable(labels.cuda(2)) segments = Variable(segments.cuda(2)) regions = Variable(regions.cuda(2)) index = Variable(index.cuda(2)) iterative_count = 0 optimizer.zero_grad() accurate, feature = model_t(images, regions, labels, 0, 'memory') labels = labels.view_as(accurate) segments = segments.view_as(accurate) regions = regions.view_as(accurate) mask = (labels > alpha) & (labels < beta) mask = mask.float().detach() loss_a = berhu(accurate, labels, mask) loss = loss_a accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) lin = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels, 2), mask).view(labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1))) log_d = torch.mean( torch.sum(torch.where( mask > 0, torch.abs( torch.log10(accurate) - torch.log10(labels)), mask).view( labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1)) loss.backward() optimizer.step() loss_t += loss * images.shape[0] loss_t /= len(retrain_samples) if loss_t < re_loss[target_index] * 0.8: break accurate, feature = model_t(images_val, regions, labels, 0, 'memory') labels_val = labels_val.view_as(accurate) mask = (labels_val > alpha) & (labels_val < beta) mask = mask.float().detach() accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) lin = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1))) log_d = torch.mean( torch.sum(torch.where( mask > 0, torch.abs( torch.log10(accurate) - torch.log10(labels_val)), mask).view(labels_val.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1)) loss_ave.append(lin.item()) loss_d_ave.append(lin.item()) loss_log_ave.append(log_d.item()) error = np.mean(loss_ave) print("error_r=%.4f,error_d=%.4f,error_log=%.4f" % (error, np.mean(loss_d_ave), np.mean(loss_log_ave))) test += 1 print(error_sum / 654) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "memory_depth_{}_{}_{}_{}_coarse_best_model.pkl".format( args.arch, args.dataset, str(epoch), str(error))) print('save success') np.save('/home/lidong/Documents/RSCFN/loss.npy', loss_rec) #exit() if epoch % 3 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "memory_depth_{}_{}_{}_ceoarse_model.pkl".format( args.arch, args.dataset, str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols)) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=8, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=8) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) pre_window = vis.image( np.random.rand(480, 640), opts=dict(title='predict!', caption='predict.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) # Setup Model model = get_model(args.arch, n_classes) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = l1 trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] else: print("No checkpoint found at '{}'".format(args.resume)) best_error = 100 best_rate = 100 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): print('training!') model.train() for i, (images, labels) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) optimizer.zero_grad() outputs = model(images) #outputs=outputs loss = loss_fn(input=outputs, target=labels) # print('training:'+str(i)+':learning_rate'+str(loss.data.cpu().numpy())) loss.backward() optimizer.step() # print(torch.Tensor([loss.data[0]]).unsqueeze(0).cpu()) if args.visdom: vis.line(X=torch.ones(1).cpu() * i, Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu()[0], win=loss_window, update='append') pre = outputs.data.cpu().numpy().astype('float32') pre = pre[0, :, :, :] #pre = np.argmax(pre, 0) pre = np.reshape(pre, [480, 640]).astype('float32') / np.max(pre) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict!', caption='predict.'), win=pre_window, ) ground = labels.data.cpu().numpy().astype('float32') #print(ground.shape) ground = ground[0, :, :] ground = np.reshape( ground, [480, 640]).astype('float32') / np.max(ground) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) # if i%100==0: # state = {'epoch': epoch, # 'model_state': model.state_dict(), # 'optimizer_state' : optimizer.state_dict(),} # torch.save(state, "training_{}_{}_model.pkl".format(i, args.dataset)) # if loss.data[0]/weight<100: # weight=100 # else if(loss.data[0]/weight<100) print("data [%d/503/%d/%d] Loss: %.4f" % (i, epoch, args.n_epoch, loss.data[0])) print('testing!') model.eval() error = [] error_rate = [] ones = np.ones([480, 640]) zeros = np.zeros([480, 640]) for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)): images_val = Variable(images_val.cuda(), volatile=True) labels_val = Variable(labels_val.cuda(), volatile=True) outputs = model(images_val) pred = outputs.data.cpu().numpy() gt = labels_val.data.cpu().numpy() pred = np.reshape(pred, [4, 480, 640]) gt = np.reshape(gt, [4, 480, 640]) dis = np.abs(gt - pred) error.append(np.mean(dis)) error_rate.append(np.mean(np.where(dis < 0.05, ones, zeros))) error = np.mean(error) error_rate = np.mean(error_rate) print("error=%.4f,error < 5 cm : %.4f" % (error, error_rate)) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save(state, "{}_{}_best_model.pkl".format(args.arch, args.dataset))
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train_region', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='test_region', img_size=(args.img_rows, args.img_cols)) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=4, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=4) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() old_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Trained Loss', legend=['Loss'])) loss_window1 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss1', legend=['Loss1'])) loss_window2 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss2', legend=['Loss'])) loss_window3 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss3', legend=['Loss3'])) pre_window1 = vis.image( np.random.rand(480, 640), opts=dict(title='predict1!', caption='predict1.'), ) pre_window2 = vis.image( np.random.rand(480, 640), opts=dict(title='predict2!', caption='predict2.'), ) pre_window3 = vis.image( np.random.rand(480, 640), opts=dict(title='predict3!', caption='predict3.'), ) support_window = vis.image( np.random.rand(480, 640), opts=dict(title='support!', caption='support.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = log_r trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model_dict = model.state_dict() pre_dict = { k: v for k, v in checkpoint['model_state'].items() if k in model_dict } model_dict.update(pre_dict) #print(model_dict['module.conv1.weight'].shape) model_dict['module.conv1.weight'] = torch.cat([ model_dict['module.conv1.weight'], torch.reshape(model_dict['module.conv1.weight'][:, 3, :, :], [64, 1, 7, 7]) ], 1) #print(model_dict['module.conv1.weight'].shape) model.load_state_dict(model_dict) #model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] print('load success!') #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None opti_dict = optimizer.state_dict() #pre_dict={k: v for k, v in checkpoint['optimizer_state'].items() if k in opti_dict} pre_dict = checkpoint['optimizer_state'] # for k,v in pre_dict.items(): # print(k) # if k=='state': # #print(v.type) # for a,b in v.items(): # print(a) # print(b['momentum_buffer'].shape) # return 0 opti_dict.update(pre_dict) # for k,v in opti_dict.items(): # print(k) # if k=='state': # #print(v.type) # for a,b in v.items(): # if a==140011149405280: # print(b['momentum_buffer'].shape) #print(opti_dict['state'][140011149405280]['momentum_buffer'].shape) opti_dict['state'][139629660382048]['momentum_buffer'] = torch.cat( [ opti_dict['state'][139629660382048]['momentum_buffer'], torch.reshape( opti_dict['state'][139629660382048]['momentum_buffer'] [:, 3, :, :], [64, 1, 7, 7]) ], 1) #print(opti_dict['module.conv1.weight'].shape) optimizer.load_state_dict(opti_dict) best_error = checkpoint['error'] + 0.15 # #print('load success!') # loss_rec=np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:816*trained] # # for i in range(300): # # loss_rec[i][1]=loss_rec[i+300][1] # for l in range(int(len(loss_rec)/816)): # if args.visdom: # #print(np.array(loss_rec[l])[1:]) # # vis.line( # # X=torch.ones(1).cpu() * loss_rec[l][0], # # Y=np.mean(np.array(loss_rec[l])[1:])*torch.ones(1).cpu(), # # win=old_window, # # update='append') # vis.line( # X=torch.ones(1).cpu() * loss_rec[l*816][0], # Y=np.mean(np.array(loss_rec[l*816:(l+1)*816])[:,1])*torch.ones(1).cpu(), # win=old_window, # update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from resnet34!') #resnet34=torch.load('/home/lidong/Documents/RSDEN/RSDEN/resnet34-333f7ec4.pth') resnet34 = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/rsnet_nyu_best_model.pkl') model_dict = model.state_dict() # for k,v in resnet34['model_state'].items(): # print(k) pre_dict = { k: v for k, v in resnet34['model_state'].items() if k in model_dict } # for k,v in pre_dict.items():e # print(k) model_dict.update(pre_dict) model_dict['module.conv1.weight'] = torch.cat([ model_dict['module.conv1.weight'], torch.mean(model_dict['module.conv1.weight'], 1, keepdim=True) ], 1) # model_dict['module.conv1.weight']=torch.transpose(model_dict['module.conv1.weight'],1,2) # model_dict['module.conv1.weight']=torch.transpose(model_dict['module.conv1.weight'],2,4) model.load_state_dict(model_dict) print('load success!') best_error = 1 trained = 0 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #trained print('training!') model.train() for i, (images, labels, segments) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) segments = Variable(segments.cuda()) # print(segments.shape) # print(images.shape) images = torch.cat([images, segments], 1) images = torch.cat([images, segments], 1) optimizer.zero_grad() outputs = model(images) #outputs=torch.reshape(outputs,[outputs.shape[0],1,outputs.shape[1],outputs.shape[2]]) #outputs=outputs loss = loss_fn(input=outputs, target=labels) out = 0.2 * loss[0] + 0.3 * loss[1] + 0.5 * loss[2] # print('training:'+str(i)+':learning_rate'+str(loss.data.cpu().numpy())) out.backward() optimizer.step() # print(torch.Tensor([loss.data[0]]).unsqueeze(0).cpu()) #print(loss.item()*torch.ones(1).cpu()) #nyu2_train:246,nyu2_all:816 if args.visdom: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 816, Y=loss[0].item() * torch.ones(1).cpu(), win=loss_window1, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 816, Y=loss[1].item() * torch.ones(1).cpu(), win=loss_window2, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 816, Y=loss[2].item() * torch.ones(1).cpu(), win=loss_window3, update='append') pre = outputs[0].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict1!', caption='predict1.'), win=pre_window1, ) pre = outputs[1].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict2!', caption='predict2.'), win=pre_window2, ) pre = outputs[2].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict3!', caption='predict3.'), win=pre_window3, ) ground = labels.data.cpu().numpy().astype('float32') #print(ground.shape) ground = ground[0, :, :] ground = (np.reshape(ground, [480, 640]).astype('float32') - np.min(ground)) / (np.max(ground) - np.min(ground)) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) ground = segments.data.cpu().numpy().astype('float32') #print(ground.shape) ground = ground[0, :, :] ground = (np.reshape(ground, [480, 640]).astype('float32') - np.min(ground)) / (np.max(ground) - np.min(ground)) vis.image( ground, opts=dict(title='support!', caption='support.'), win=support_window, ) loss_rec.append([ i + epoch * 816, torch.Tensor([loss[0].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[1].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[2].item()]).unsqueeze(0).cpu() ]) print("data [%d/816/%d/%d] Loss1: %.4f Loss2: %.4f Loss3: %.4f" % (i, epoch, args.n_epoch, loss[0].item(), loss[1].item(), loss[2].item())) #epoch=3 if epoch % 1 == 0: print('testing!') model.train() error_lin = [] error_log = [] error_va = [] error_rate = [] error_absrd = [] error_squrd = [] thre1 = [] thre2 = [] thre3 = [] for i_val, (images_val, labels_val, segments) in tqdm(enumerate(valloader)): print(r'\n') images_val = Variable(images_val.cuda(), requires_grad=False) labels_val = Variable(labels_val.cuda(), requires_grad=False) segments = Variable(segments.cuda()) images_val = torch.cat([images_val, segments], 1) images_val = torch.cat([images_val, segments], 1) with torch.no_grad(): outputs = model(images_val) pred = outputs[2].data.cpu().numpy() gt = labels_val.data.cpu().numpy() ones = np.ones((gt.shape)) zeros = np.zeros((gt.shape)) pred = np.reshape(pred, (gt.shape)) #gt=np.reshape(gt,[4,480,640]) dis = np.square(gt - pred) error_lin.append(np.sqrt(np.mean(dis))) dis = np.square(np.log(gt) - np.log(pred)) error_log.append(np.sqrt(np.mean(dis))) alpha = np.mean(np.log(gt) - np.log(pred)) dis = np.square(np.log(pred) - np.log(gt) + alpha) error_va.append(np.mean(dis) / 2) dis = np.mean(np.abs(gt - pred)) / gt error_absrd.append(np.mean(dis)) dis = np.square(gt - pred) / gt error_squrd.append(np.mean(dis)) thelt = np.where(pred / gt > gt / pred, pred / gt, gt / pred) thres1 = 1.25 thre1.append(np.mean(np.where(thelt < thres1, ones, zeros))) thre2.append( np.mean(np.where(thelt < thres1 * thres1, ones, zeros))) thre3.append( np.mean( np.where(thelt < thres1 * thres1 * thres1, ones, zeros))) #a=thre1[i_val] #error_rate.append(np.mean(np.where(dis<0.6,ones,zeros))) print( "error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f" % (error_lin[i_val], error_log[i_val], error_va[i_val], error_absrd[i_val], error_squrd[i_val], thre1[i_val], thre2[i_val], thre3[i_val])) error = np.mean(error_lin) #error_rate=np.mean(error_rate) print("error=%.4f" % (error)) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_best_model.pkl".format(args.arch, args.dataset)) print('save success') np.save('/home/lidong/Documents/RSDEN/RSDEN//loss.npy', loss_rec) if epoch % 10 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_model.pkl".format(args.arch, args.dataset, str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train_region', img_size=(args.img_rows, args.img_cols), task='all') v_loader = data_loader(data_path, is_transform=True, split='test_region', img_size=(args.img_rows, args.img_cols), task='all') n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() # old_window = vis.line(X=torch.zeros((1,)).cpu(), # Y=torch.zeros((1)).cpu(), # opts=dict(xlabel='minibatches', # ylabel='Loss', # title='Trained Loss', # legend=['Loss']) a_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Region Loss1', legend=['Region'])) loss_window1 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss1', legend=['Loss1'])) loss_window2 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss2', legend=['Loss'])) loss_window3 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss3', legend=['Loss3'])) pre_window1 = vis.image( np.random.rand(480, 640), opts=dict(title='predict1!', caption='predict1.'), ) pre_window2 = vis.image( np.random.rand(480, 640), opts=dict(title='predict2!', caption='predict2.'), ) pre_window3 = vis.image( np.random.rand(480, 640), opts=dict(title='predict3!', caption='predict3.'), ) ground_window = vis.image(np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.')), region_window = vis.image( np.random.rand(480, 640), opts=dict(title='region!', caption='region.'), ) cuda0 = torch.device('cuda:0') cuda1 = torch.device('cuda:1') cuda2 = torch.device('cuda:2') cuda3 = torch.device('cuda:3') # Setup Model rsnet = get_model('rsnet') rsnet = torch.nn.DataParallel(rsnet, device_ids=[0, 1]) rsnet.cuda(cuda0) drnet = get_model('drnet') drnet = torch.nn.DataParallel(drnet, device_ids=[2, 3]) drnet.cuda(cuda2) parameters = list(rsnet.parameters()) + list(drnet.parameters()) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(drnet.module, 'optimizer'): optimizer = drnet.module.optimizer else: # optimizer = torch.optim.Adam( # rsnet.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD(rsnet.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(rsnet.module, 'loss'): print('Using custom loss') loss_fn = rsnet.module.loss else: loss_fn = log_r #loss_fn = region_r trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/rsnet_nyu_best_model.pkl') rsnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] best_error = checkpoint['error'] print('load success from rsnet %.d' % trained) checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/drnet_nyu_best_model.pkl') drnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] print('load success from drnet %.d' % trained) #print('load success!') loss_rec = np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:1632 * trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec) / 1632)): if args.visdom: vis.line( X=torch.ones(1).cpu() * loss_rec[l * 1632][0], Y=np.mean( np.array(loss_rec[l * 1632:(l + 1) * 1632])[:, 1]) * torch.ones(1).cpu(), win=old_window, update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize seperately!') checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/rsnet_nyu_135_model.pkl') rsnet.load_state_dict(checkpoint['model_state']) trained = checkpoint['epoch'] best_error = checkpoint['error'] print(best_error) print('load success from rsnet %.d' % trained) checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/drnet_nyu_135_model.pkl') # model_dict=drnet.state_dict() # pre_dict={k: v for k, v in checkpoint['model_state'].items() if k in model_dict} # model_dict.update(pre_dict) # #print(model_dict['module.conv1.weight'].shape) # model_dict['module.conv1.weight']=torch.cat([model_dict['module.conv1.weight'],torch.reshape(model_dict['module.conv1.weight'][:,3,:,:],[64,1,7,7])],1) # #print(model_dict['module.conv1.weight'].shape) # drnet.load_state_dict(model_dict) drnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] print('load success from drnet %.d' % trained) #trained=0 loss_rec = [] #loss_rec=np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') #loss_rec=list(loss_rec) #loss_rec=loss_rec[:1632*trained] #average_loss=checkpoint['error'] # opti_dict=optimizer.state_dict() # #pre_dict={k: v for k, v in checkpoint['optimizer_state'].items() if k in opti_dict} # pre_dict=checkpoint['optimizer_state'] # # for k,v in pre_dict.items(): # # print(k) # # if k=='state': # # #print(v.type) # # for a,b in v.items(): # # print(a) # # print(b['momentum_buffer'].shape) # #return 0 # opti_dict.update(pre_dict) # # for k,v in opti_dict.items(): # # print(k) # # if k=='state': # # #print(v.type) # # for a,b in v.items(): # # if a==140011149405280: # # print(b['momentum_buffer'].shape) # #print(opti_dict['state'][140011149405280]['momentum_buffer'].shape) # opti_dict['state'][140011149405280]['momentum_buffer']=torch.cat([opti_dict['state'][140011149405280]['momentum_buffer'],torch.reshape(opti_dict['state'][140011149405280]['momentum_buffer'][:,3,:,:],[64,1,7,7])],1) # #print(opti_dict['module.conv1.weight'].shape) # optimizer.load_state_dict(opti_dict) # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #trained print('training!') rsnet.train() drnet.train() for i, (images, labels, segments) in enumerate(trainloader): images = images.cuda() labels = labels.cuda(cuda2) segments = segments.cuda(cuda2) #for error_sample in range(10): optimizer.zero_grad() #with torch.autograd.enable_grad(): region_support = rsnet(images) #with torch.autograd.enable_grad(): coarse_depth = torch.cat([images, region_support], 1) coarse_depth = torch.cat([coarse_depth, region_support], 1) #with torch.no_grad(): outputs = drnet(coarse_depth) #outputs.append(region_support) #outputs=torch.reshape(outputs,[outputs.shape[0],1,outputs.shape[1],outputs.shape[2]]) #outputs=outputs loss = loss_fn(input=outputs, target=labels) out = 0.2 * loss[0] + 0.3 * loss[1] + 0.5 * loss[2] #out=out a = l1(input=region_support, target=labels.to(cuda0)) #a=region_log(input=region_support,target=labels.to(cuda0),instance=segments.to(cuda0)).to(cuda2) b = log_loss(region_support, labels.to(cuda0)).item() #out=0.8*out+0.02*a #a.backward() # print('training:'+str(i)+':learning_rate'+str(loss.data.cpu().numpy())) out.backward() optimizer.step() # print('out:%.4f,error_sample:%d'%(out.item(),error_sample)) # if i==0: # average_loss=(average_loss+out.item())/2 # break # if out.item()<average_loss/i: # break # print(torch.Tensor([loss.data[0]]).unsqueeze(0).cpu()) #print(loss.item()*torch.ones(1).cpu()) #nyu2_train:246,nyu2_all:1632 if args.visdom: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 1632, Y=a.item() * torch.ones(1).cpu(), win=a_window, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 1632, Y=loss[0].item() * torch.ones(1).cpu(), win=loss_window1, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 1632, Y=loss[1].item() * torch.ones(1).cpu(), win=loss_window2, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 1632, Y=loss[2].item() * torch.ones(1).cpu(), win=loss_window3, update='append') pre = outputs[0].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict1!', caption='predict1.'), win=pre_window1, ) pre = outputs[1].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict2!', caption='predict2.'), win=pre_window2, ) pre = outputs[2].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict3!', caption='predict3.'), win=pre_window3, ) ground = labels.data.cpu().numpy().astype('float32') #print(ground.shape) ground = ground[0, :, :] ground = (np.reshape(ground, [480, 640]).astype('float32') - np.min(ground)) / (np.max(ground) - np.min(ground)) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) region_vis = region_support.data.cpu().numpy().astype( 'float32') #print(ground.shape) region_vis = region_vis[0, :, :] region_vis = ( np.reshape(region_vis, [480, 640]).astype('float32') - np.min(region_vis)) / (np.max(region_vis) - np.min(region_vis)) vis.image( region_vis, opts=dict(title='region_vis!', caption='region_vis.'), win=region_window, ) #average_loss+=out.item() loss_rec.append([ i + epoch * 1632, torch.Tensor([loss[0].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[1].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[2].item()]).unsqueeze(0).cpu() ]) print( "data [%d/1632/%d/%d]region:%.4f,%.4f Loss1: %.4f Loss2: %.4f Loss3: %.4f out:%.4f " % (i, epoch, args.n_epoch, a.item(), b, loss[0].item(), loss[1].item(), loss[2].item(), out.item())) #average_loss=average_loss/816 if epoch > 50: check = 1 else: check = 1 if epoch > 70: check = 1 if epoch % check == 0: print('testing!') rsnet.train() drnet.train() error_lin = [] error_log = [] error_va = [] error_rate = [] error_absrd = [] error_squrd = [] thre1 = [] thre2 = [] thre3 = [] for i_val, (images, labels, segments) in tqdm(enumerate(valloader)): #print(r'\n') images = images.cuda() labels = labels.cuda() optimizer.zero_grad() print(i_val) with torch.no_grad(): region_support = rsnet(images) coarse_depth = torch.cat([images, region_support], 1) coarse_depth = torch.cat([coarse_depth, region_support], 1) outputs = drnet(coarse_depth) pred = outputs[2].data.cpu().numpy() gt = labels.data.cpu().numpy() ones = np.ones((gt.shape)) zeros = np.zeros((gt.shape)) pred = np.reshape(pred, (gt.shape)) #gt=np.reshape(gt,[4,480,640]) dis = np.square(gt - pred) error_lin.append(np.sqrt(np.mean(dis))) dis = np.square(np.log(gt) - np.log(pred)) error_log.append(np.sqrt(np.mean(dis))) alpha = np.mean(np.log(gt) - np.log(pred)) dis = np.square(np.log(pred) - np.log(gt) + alpha) error_va.append(np.mean(dis) / 2) dis = np.mean(np.abs(gt - pred)) / gt error_absrd.append(np.mean(dis)) dis = np.square(gt - pred) / gt error_squrd.append(np.mean(dis)) thelt = np.where(pred / gt > gt / pred, pred / gt, gt / pred) thres1 = 1.25 thre1.append(np.mean(np.where(thelt < thres1, ones, zeros))) thre2.append( np.mean(np.where(thelt < thres1 * thres1, ones, zeros))) thre3.append( np.mean( np.where(thelt < thres1 * thres1 * thres1, ones, zeros))) #a=thre1[i_val] #error_rate.append(np.mean(np.where(dis<0.6,ones,zeros))) print( "error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f" % (error_lin[i_val], error_log[i_val], error_va[i_val], error_absrd[i_val], error_squrd[i_val], thre1[i_val], thre2[i_val], thre3[i_val])) # if i_val > 219/check: # break error = np.mean(error_lin) #error_rate=np.mean(error_rate) print("error=%.4f" % (error)) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': rsnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_best_model.pkl".format('rsnet', args.dataset)) state = { 'epoch': epoch + 1, 'model_state': drnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_best_model.pkl".format('drnet', args.dataset)) print('save success') np.save('/home/lidong/Documents/RSDEN/RSDEN//loss.npy', loss_rec) if epoch % 3 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': rsnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_model.pkl".format('rsnet', args.dataset, str(epoch))) state = { 'epoch': epoch + 1, 'model_state': drnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_model.pkl".format('drnet', args.dataset, str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) # t_loader = data_loader(data_path, is_transform=True, # split='nyu2_train', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='test_region', img_size=(args.img_rows, args.img_cols)) # n_classes = t_loader.n_classes #trainloader = data.DataLoader( # t_loader, batch_size=args.batch_size, num_workers=8, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=4) # Setup Metrics #running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) pre_window = vis.image( np.random.rand(480, 640), opts=dict(title='predict!', caption='predict.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = l1 trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # model_dict=model.state_dict() # pre_dict={k: v for k, v in checkpoint['model_state'].items() if k in model_dict} # model_dict.update(pre_dict) # #print(model_dict['module.conv1.weight'].shape) # model_dict['module.conv1.weight']=torch.cat([model_dict['module.conv1.weight'],torch.reshape(model_dict['module.conv1.weight'][:,3,:,:],[64,1,7,7])],1) # #print(model_dict['module.conv1.weight'].shape) # model.load_state_dict(model_dict) model.load_state_dict(checkpoint['model_state']) # optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] print('load success!') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from resnet50!') resnet50 = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/resnet34-333f7ec4.pth') model_dict = model.state_dict() pre_dict = {k: v for k, v in resnet50.items() if k in model_dict} model_dict.update(pre_dict) model.load_state_dict(model_dict) print('load success!') #model_dict=model.state_dict() best_error = 100 best_rate = 100 # it should be range(checkpoint[''epoch],args.n_epoch) #for epoch in range(trained, args.n_epoch): print('testing!') model.train() error_lin = [] error_log = [] error_va = [] error_rate = [] error_absrd = [] error_squrd = [] thre1 = [] thre2 = [] thre3 = [] for i_val, (images_val, labels_val, segs) in tqdm(enumerate(valloader)): images_val = Variable(images_val.cuda(), requires_grad=False) labels_val = Variable(labels_val.cuda(), requires_grad=False) segs = Variable(segs.cuda(), requires_grad=False) # print(segments.shape) # print(images.shape) images_val = torch.cat([images_val, segs], 1) images_val = torch.cat([images_val, segs], 1) with torch.no_grad(): outputs = model(images_val) pre = outputs[2] pred = outputs[0].data.cpu().numpy() + 1e-12 num = torch.sum( torch.where(pre > 0, torch.ones_like(pre), torch.zeros_like(pre))) / torch.sum( torch.ones_like(pre)) #print(num) gt = labels_val.data.cpu().numpy() + 1e-12 ones = np.ones((gt.shape)) zeros = np.zeros((gt.shape)) pred = np.reshape(pred, (gt.shape)) #gt=np.reshape(gt,[4,480,640]) dis = np.square(gt - pred) error_lin.append(np.sqrt(np.mean(dis))) dis = np.square(np.log(gt) - np.log(pred)) error_log.append(np.sqrt(np.mean(dis))) alpha = np.mean(np.log(gt) - np.log(pred)) dis = np.square(np.log(pred) - np.log(gt) + alpha) error_va.append(np.mean(dis) / 2) #error_va.append(np.mean(dis)/2) dis = np.mean(np.abs(gt - pred)) / gt error_absrd.append(np.mean(dis)) dis = np.square(gt - pred) / gt error_squrd.append(np.mean(dis)) thelt = np.where(pred / gt > gt / pred, pred / gt, gt / pred) thres1 = 1.25 thre1.append(np.mean(np.where(thelt < thres1, ones, zeros))) thre2.append( np.mean(np.where(thelt < thres1 * thres1, ones, zeros))) thre3.append( np.mean(np.where(thelt < thres1 * thres1 * thres1, ones, zeros))) #a=thre1[i_val] #error_rate.append(np.mean(np.where(dis<0.6,ones,zeros))) print( "error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f" % (error_lin[i_val], error_log[i_val], error_va[i_val], error_absrd[i_val], error_squrd[i_val], thre1[i_val], thre2[i_val], thre3[i_val])) #loss = loss_fn(input=outputs, target=labels_val) #print("Loss: %.4f" % (loss.item())) np.save('/home/lidong/Documents/RSDEN/RSDEN//error_train.npy', [ error_lin[i_val], error_log[i_val], error_va[i_val], error_absrd[i_val], error_squrd[i_val], thre1[i_val], thre2[i_val], thre3[i_val] ]) error_lin = np.mean(error_lin) error_log = np.mean(error_log) error_va = np.mean(error_va) error_absrd = np.mean(error_absrd) error_squrd = np.mean(error_squrd) thre1 = np.mean(thre1) thre2 = np.mean(thre2) thre3 = np.mean(thre3) print('Final Result!') print( "error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f" % (error_lin, error_log, error_va, error_absrd, error_squrd, thre1, thre2, thre3))
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train_region', img_size=(args.img_rows, args.img_cols), task='all') v_loader = data_loader(data_path, is_transform=True, split='test_region', img_size=(args.img_rows, args.img_cols), task='all') n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() # old_window = vis.line(X=torch.zeros((1,)).cpu(), # Y=torch.zeros((1)).cpu(), # opts=dict(xlabel='minibatches', # ylabel='Loss', # title='Trained Loss', # legend=['Loss'])) loss_window1 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss1', legend=['Loss1'])) loss_window2 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss2', legend=['Loss'])) loss_window3 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss3', legend=['Loss3'])) pre_window1 = vis.image( np.random.rand(480, 640), opts=dict(title='predict1!', caption='predict1.'), ) pre_window2 = vis.image( np.random.rand(480, 640), opts=dict(title='predict2!', caption='predict2.'), ) pre_window3 = vis.image( np.random.rand(480, 640), opts=dict(title='predict3!', caption='predict3.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) cuda0 = torch.device('cuda:0') cuda1 = torch.device('cuda:1') cuda2 = torch.device('cuda:2') cuda3 = torch.device('cuda:3') # Setup Model rsnet = get_model('rsnet') rsnet = torch.nn.DataParallel(rsnet, device_ids=[0, 1]) rsnet.cuda(cuda0) drnet = get_model('drnet') drnet = torch.nn.DataParallel(drnet, device_ids=[2, 3]) drnet.cuda(cuda2) parameters = list(rsnet.parameters()) + list(drnet.parameters()) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(drnet.module, 'optimizer'): optimizer = drnet.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD(parameters, lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(rsnet.module, 'loss'): print('Using custom loss') loss_fn = rsnet.module.loss else: loss_fn = l1_r trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] #print('load success!') loss_rec = np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:1632 * trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec) / 1632)): if args.visdom: vis.line( X=torch.ones(1).cpu() * loss_rec[l * 1632][0], Y=np.mean( np.array(loss_rec[l * 1632:(l + 1) * 1632])[:, 1]) * torch.ones(1).cpu(), win=old_window, update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize seperately!') checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/rsnet_nyu_best_model.pkl') rsnet.load_state_dict(checkpoint['model_state']) trained = checkpoint['epoch'] print('load success from rsnet %.d' % trained) best_error = checkpoint['error'] checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/drnet_nyu_best_model.pkl') drnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] print('load success from drnet %.d' % trained) trained = 0 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): rsnet.train() drnet.train() if epoch % 1 == 0: print('testing!') rsnet.train() drnet.train() error_lin = [] error_log = [] error_va = [] error_rate = [] error_absrd = [] error_squrd = [] thre1 = [] thre2 = [] thre3 = [] for i_val, (images, labels, segments) in tqdm(enumerate(valloader)): #print(r'\n') images = images.cuda() labels = labels.cuda() optimizer.zero_grad() print(i_val) with torch.no_grad(): region_support = rsnet(images) coarse_depth = torch.cat([images, region_support], 1) coarse_depth = torch.cat([coarse_depth, region_support], 1) outputs = drnet(coarse_depth) pred = outputs[2].data.cpu().numpy() gt = labels.data.cpu().numpy() ones = np.ones((gt.shape)) zeros = np.zeros((gt.shape)) pred = np.reshape(pred, (gt.shape)) #gt=np.reshape(gt,[4,480,640]) dis = np.square(gt - pred) error_lin.append(np.sqrt(np.mean(dis))) dis = np.square(np.log(gt) - np.log(pred)) error_log.append(np.sqrt(np.mean(dis))) alpha = np.mean(np.log(gt) - np.log(pred)) dis = np.square(np.log(pred) - np.log(gt) + alpha) error_va.append(np.mean(dis) / 2) dis = np.mean(np.abs(gt - pred)) / gt error_absrd.append(np.mean(dis)) dis = np.square(gt - pred) / gt error_squrd.append(np.mean(dis)) thelt = np.where(pred / gt > gt / pred, pred / gt, gt / pred) thres1 = 1.25 thre1.append(np.mean(np.where(thelt < thres1, ones, zeros))) thre2.append( np.mean(np.where(thelt < thres1 * thres1, ones, zeros))) thre3.append( np.mean( np.where(thelt < thres1 * thres1 * thres1, ones, zeros))) #a=thre1[i_val] #error_rate.append(np.mean(np.where(dis<0.6,ones,zeros))) print( "error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f" % (error_lin[i_val], error_log[i_val], error_va[i_val], error_absrd[i_val], error_squrd[i_val], thre1[i_val], thre2[i_val], thre3[i_val])) np.save('/home/lidong/Documents/RSDEN/RSDEN//error_train.npy', [ error_lin[i_val], error_log[i_val], error_va[i_val], error_absrd[i_val], error_squrd[i_val], thre1[i_val], thre2[i_val], thre3[i_val] ]) error_lin = np.mean(error_lin) error_log = np.mean(error_log) error_va = np.mean(error_va) error_absrd = np.mean(error_absrd) error_squrd = np.mean(error_squrd) thre1 = np.mean(thre1) thre2 = np.mean(thre2) thre3 = np.mean(thre3) print('Final Result!') print( "error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f" % (error_lin, error_log, error_va, error_absrd, error_squrd, thre1, thre2, thre3)) break
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='all') v_loader = data_loader(data_path, is_transform=True, split='eval', img_size=(args.img_rows, args.img_cols), task='all') trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() loss_window1 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss1', legend=['Loss1'])) loss_window2 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss2', legend=['Loss'])) loss_window3 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss3', legend=['Loss3'])) pre_window1 = vis.image( np.random.rand(args.img_rows, args.img_cols), opts=dict(title='predict1!', caption='predict1.'), ) pre_window2 = vis.image( np.random.rand(args.img_rows, args.img_cols), opts=dict(title='predict2!', caption='predict2.'), ) pre_window3 = vis.image( np.random.rand(args.img_rows, args.img_cols), opts=dict(title='predict3!', caption='predict3.'), ) ground_window = vis.image(np.random.rand(args.img_rows, args.img_cols), opts=dict(title='ground!', caption='ground.')), region_window = vis.image( np.random.rand(args.img_rows, args.img_cols), opts=dict(title='region!', caption='region.'), ) cuda0 = torch.device('cuda:0') cuda1 = torch.device('cuda:1') cuda2 = torch.device('cuda:2') cuda3 = torch.device('cuda:3') # Setup Model rsnet = get_model('rsnet') rsnet = torch.nn.DataParallel(rsnet, device_ids=[0, 1]) rsnet.cuda(cuda0) drnet = get_model('drnet') drnet = torch.nn.DataParallel(drnet, device_ids=[2, 3]) drnet.cuda(cuda2) parameters = list(rsnet.parameters()) + list(drnet.parameters()) if hasattr(drnet.module, 'optimizer'): optimizer = drnet.module.optimizer else: optimizer = torch.optim.Adam(rsnet.parameters(), lr=args.l_rate, weight_decay=5e-4, betas=(0.9, 0.999)) # optimizer = torch.optim.SGD( # rsnet.parameters(), lr=args.l_rate,momentum=0.99, weight_decay=5e-4) if hasattr(rsnet.module, 'loss'): print('Using custom loss') loss_fn = rsnet.module.loss else: loss_fn = log_r_kitti #loss_fn = region_r trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/rsnet_nyu_best_model.pkl') rsnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] best_error = checkpoint['error'] print('load success from rsnet %.d' % trained) checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/drnet_nyu_best_model.pkl') drnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] print('load success from drnet %.d' % trained) #print('load success!') loss_rec = np.load( '/home/lidong/Documents/RSDEN/RSDEN/kitti/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:85898 * trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec) / 85898)): if args.visdom: vis.line(X=torch.ones(1).cpu() * loss_rec[l * 85898][0], Y=np.mean( np.array(loss_rec[l * 85898:(l + 1) * 85898])[:, 1]) * torch.ones(1).cpu(), win=old_window, update='append') else: print('Initialize seperately!') checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/kitti/rsnet_kitti_0_27000_model.pkl' ) rsnet.load_state_dict(checkpoint['model_state']) trained = 0 best_error = 100 print(best_error) print('load success from rsnet %.d' % trained) checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/kitti/drnet_kitti_0_27000_model.pkl' ) drnet.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) trained = 0 print('load success from drnet %.d' % trained) #trained=27000 loss_rec = [] for epoch in range(trained, args.n_epoch): #trained print('training!') rsnet.train() drnet.train() for i, (images, labels) in enumerate(trainloader): images = images.cuda() optimizer.zero_grad() region_support = rsnet(images) coarse_depth = torch.cat([images, region_support], 1) coarse_depth = torch.cat([coarse_depth, region_support], 1) outputs = drnet(coarse_depth) labels = labels.cuda(cuda2) #linear_error=torch.where(target>0,target-pre[0]) loss = loss_fn(input=outputs, target=labels) out = 0.2 * loss[0] + 0.3 * loss[1] + 0.5 * loss[2] out.backward() optimizer.step() if args.visdom: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 85898, Y=loss[0].item() * torch.ones(1).cpu(), win=loss_window1, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 85898, Y=loss[1].item() * torch.ones(1).cpu(), win=loss_window2, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 85898, Y=loss[2].item() * torch.ones(1).cpu(), win=loss_window3, update='append') pre = outputs[0].data.cpu().numpy().astype('float32') pre = pre[0, :, :] pre = (np.reshape( pre, [args.img_rows, args.img_cols]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) vis.image( pre, opts=dict(title='predict1!', caption='predict1.'), win=pre_window1, ) pre = outputs[1].data.cpu().numpy().astype('float32') pre = pre[0, :, :] pre = (np.reshape( pre, [args.img_rows, args.img_cols]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) vis.image( pre, opts=dict(title='predict2!', caption='predict2.'), win=pre_window2, ) pre = outputs[2].data.cpu().numpy().astype('float32') pre = pre[0, :, :] pre = (np.reshape( pre, [args.img_rows, args.img_cols]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) vis.image( pre, opts=dict(title='predict3!', caption='predict3.'), win=pre_window3, ) ground = labels.data.cpu().numpy().astype('float32') #print(ground.shape) ground = ground[0, :, :] ground = (np.reshape( ground, [args.img_rows, args.img_cols]).astype('float32') - np.min(ground)) / (np.max(ground) - np.min(ground)) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) region_vis = region_support.data.cpu().numpy().astype( 'float32') #print(ground.shape) region_vis = region_vis[0, :, :] region_vis = (np.reshape( region_vis, [args.img_rows, args.img_cols ]).astype('float32') - np.min(region_vis)) / ( np.max(region_vis) - np.min(region_vis)) vis.image( region_vis, opts=dict(title='region_vis!', caption='region_vis.'), win=region_window, ) loss_rec.append([ i + epoch * 85898, torch.Tensor([loss[0].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[1].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[2].item()]).unsqueeze(0).cpu() ]) print( "data [%d/85898/%d/%d] Loss1: %.4f Loss2: %.4f Loss3: %.4f out:%.4f " % (i + 27001, epoch, args.n_epoch, loss[0].item(), loss[1].item(), loss[2].item(), out.item())) if i % 1000 == 0: i = i + 27001 #best_error = error state = { 'epoch': epoch + 1, 'model_state': rsnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': out.item(), } torch.save( state, "{}_{}_{}_{}_model.pkl".format('rsnet', args.dataset, str(epoch), str(i))) state = { 'epoch': epoch + 1, 'model_state': drnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': out.item(), } torch.save( state, "{}_{}_{}_{}_model.pkl".format('drnet', args.dataset, str(epoch), str(i))) print('save success') #average_loss=average_loss/816 check = 1 if epoch % check == 0: print('testing!') rsnet.test() drnet.test() rmse = [] silog = [] log_rmse = [] for i_val, (images, labels) in tqdm(enumerate(valloader)): #print(r'\n') images = images.cuda() labels = labels.cuda() optimizer.zero_grad() print(i_val) with torch.no_grad(): region_support = rsnet(images) coarse_depth = torch.cat([images, region_support], 1) coarse_depth = torch.cat([coarse_depth, region_support], 1) outputs = drnet(coarse_depth) #pred = outputs[2].data.cpu().numpy() #gt = labels.data.cpu().numpy() ones = np.ones((gt.shape)) zeros = np.zeros((gt.shape)) gt = labels pred = outputs[2] num = torch.sum(torch.where(gt > 0, ones, zeros)) pred = torch.reshape(pred, gt.shape) rmse.append( torch.sum(torch.where(gt > 0, torch.pow(gt - pred, 2))) / num) gt = torch.where(gt > 0, torch.log(gt + 1e-6), zeros) pred = torch.where(gt > 0, torch.log(pred + 1e-6), zeros) silog.append( torch.sum(torch.where(gt > 0, torch.pow(gt - pred, 2))) / num - torch.pow(torch.sum(torch.where(gt > 0, gt - pred)), 2) / num / num) log_rmse.append( torch.sum(torch.where(gt > 0, torch.pow(gt - pred, 2))) / num) print("rmse=%.4f,silog=%.4f,log_rmse=%.4f" % (rmse[i_val], silog[i_val], log_rmse[i_val])) rmse = np.mean(rmse) silog = np.mean(silog) log_rmse = np.mean(log_rmse) #error_rate=np.mean(error_rate) print("rmse=%.4f,silog=%.4f,log_rmse=%.4f" % (rmse, silog, log_rmse)) # if error<= best_error: # best_error = error # state = {'epoch': epoch+1, # 'model_state': rsnet.state_dict(), # 'optimizer_state': optimizer.state_dict(), # 'error': error,} # torch.save(state, "{}_{}_best_model.pkl".format( # 'rsnet', args.dataset)) # state = {'epoch': epoch+1, # 'model_state': drnet.state_dict(), # 'optimizer_state': optimizer.state_dict(), # 'error': error,} # torch.save(state, "{}_{}_best_model.pkl".format( # 'drnet', args.dataset)) # print('save success') # np.save('/home/lidong/Documents/RSDEN/RSDEN/kitti/loss.npy',loss_rec) if epoch % 1 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': rsnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_model.pkl".format('rsnet', args.dataset, str(epoch))) state = { 'epoch': epoch + 1, 'model_state': drnet.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_model.pkl".format('drnet', args.dataset, str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='region') v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols), task='region') n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=4, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=4) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() depth_window = vis.image( np.random.rand(480, 640), opts=dict(title='depth!', caption='depth.'), ) mask_window = vis.image( np.random.rand(480, 640), opts=dict(title='mask!', caption='mask.'), ) region_window = vis.image( np.random.rand(480, 640), opts=dict(title='region!', caption='region.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) old_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Trained Loss', legend=['Loss'])) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, weight_decay=5e-4, betas=(0.9, 0.999)) # optimizer = torch.optim.SGD( # model.parameters(), lr=args.l_rate,momentum=0.90, weight_decay=5e-4) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = log_loss trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] print(best_error) loss_rec = np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:179 * trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec) / 179)): if args.visdom: vis.line( X=torch.ones(1).cpu() * loss_rec[l * 179][0], Y=np.mean( np.array(loss_rec[l * 179:(l + 1) * 179])[:, 1]) * torch.ones(1).cpu(), win=old_window, update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from rsn!') rsn = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/rsn_mask_nyu2_best_model.pkl', map_location='cpu') model_dict = model.state_dict() #print(model_dict) pre_dict = { k: v for k, v in rsn['model_state'].items() if k in model_dict and rsn['model_state'].items() } key = [] for k, v in pre_dict.items(): if v.shape != model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) model_dict.update(pre_dict) model.load_state_dict(model_dict) print('load success!') best_error = 100 trained = 0 del rsn # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #trained print('training!') model.train() for i, (images, labels, regions, segments) in enumerate(trainloader): #break images = Variable(images.cuda()) labels = Variable(labels.cuda()) segments = Variable(segments.cuda()) regions = Variable(regions.cuda()) #break optimizer.zero_grad() #outputs,mask = model(images) mask = model(images) outputs = regions #loss_d = region_log(outputs,labels,segments) segments = torch.reshape( segments, [mask.shape[0], mask.shape[2], mask.shape[3]]) #loss_m = mask_loss(input=mask,target=segments) loss_m = mask_loss_region(mask, segments) #region=segments #print(loss_m) #mask_map=torch.argmax(mask) #loss_r,region= region_loss(outputs,mask,regions,segments) #loss_c=loss_d # print('training:'+str(i)+':learning_rate'+str(loss.data.cpu().numpy())) #loss=0.5*loss_d+0.5*(loss_m+loss_r) #break #loss_d=loss_r #loss=0.25*loss_r+0.5*loss_m+0.25*loss_d loss_d = loss_m loss_r = loss_m region = segments loss = loss_m loss.backward() optimizer.step() # print(torch.Tensor([loss.data[0]]).unsqueeze(0).cpu()) #print(loss.item()*torch.ones(1).cpu()) #nyu2_train:246,nyu2_all:179 if args.visdom: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 179, Y=loss.item() * torch.ones(1).cpu(), win=loss_window, update='append') depth = outputs.data.cpu().numpy().astype('float32') depth = depth[0, :, :, :] depth = (np.reshape(depth, [480, 640]).astype('float32') - np.min(depth)) / (np.max(depth) - np.min(depth) + 1) vis.image( depth, opts=dict(title='depth!', caption='depth.'), win=depth_window, ) mask = torch.argmax(mask, dim=1).data.cpu().numpy().astype('float32') mask = mask[0, ...] mask = (np.reshape(mask, [480, 640]).astype('float32') - np.min(mask)) / (np.max(mask) - np.min(mask) + 1) vis.image( mask, opts=dict(title='mask!', caption='mask.'), win=mask_window, ) region = region.data.cpu().numpy().astype('float32') region = region[0, ...] region = (np.reshape(region, [480, 640]).astype('float32') - np.min(region)) / (np.max(region) - np.min(region) + 1) vis.image( region, opts=dict(title='region!', caption='region.'), win=region_window, ) ground = regions.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = (np.reshape(ground, [480, 640]).astype('float32') - np.min(ground)) / (np.max(ground) - np.min(ground) + 1) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) loss_rec.append([ i + epoch * 179, torch.Tensor([loss.item()]).unsqueeze(0).cpu() ]) print( "data [%d/179/%d/%d] Loss: %.4f Lossd: %.4f Lossm: %.4f Lossr: %.4f" % (i, epoch, args.n_epoch, loss.item(), loss_d.item(), loss_m.item(), loss_r.item())) if epoch > 30: check = 5 else: check = 10 if epoch > 50: check = 3 if epoch > 70: check = 1 #epoch=3 if epoch % check == 0: print('testing!') model.eval() loss_ave = [] for i_val, (images_val, labels_val, regions, segments) in tqdm(enumerate(valloader)): #print(r'\n') images_val = Variable(images_val.cuda(), requires_grad=False) labels_val = Variable(labels_val.cuda(), requires_grad=False) segments_val = Variable(segments.cuda(), requires_grad=False) regions_val = Variable(regions.cuda(), requires_grad=False) with torch.no_grad(): #outputs,mask = model(images_val) mask = model(images_val) outputs = regions #region= region_generation(outputs,mask,regions_val,segments_val) #loss_d = l2(input=region, target=regions_val) segments_val = torch.reshape( segments_val, [mask.shape[0], mask.shape[2], mask.shape[3]]) #loss_r,region= region_loss(outputs,mask,regions_val,segments_val) loss_r = mask_loss_region(mask, segments_val) loss_ave.append(loss_r.data.cpu().numpy()) print(loss_ave[-1]) #exit() error = np.mean(loss_ave) #error_rate=np.mean(error_rate) print("error=%.4f" % (error)) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_best_model.pkl".format(args.arch, args.dataset)) print('save success') np.save('/home/lidong/Documents/RSDEN/RSDEN/loss.npy', loss_rec) if epoch % 15 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_model.pkl".format(args.arch, args.dataset, str(epoch))) print('save success')
def train(args): scale = 2 cuda_id = 0 torch.backends.cudnn.benchmark = True # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), task='region') v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols), task='region') train_len = t_loader.length / args.batch_size trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=False) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom(env='nyu_proup_refine') proup_refine_window = vis.image( np.random.rand(228, 304), opts=dict(title='depth!', caption='depth.'), ) accurate_window = vis.image( np.random.rand(228, 304), opts=dict(title='accurate!', caption='accurate.'), ) ground_window = vis.image( np.random.rand(228, 304), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image( np.random.rand(228, 304), opts=dict(title='img!', caption='img.'), ) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) lin_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='linear Loss', legend=['linear error'])) error_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='error', legend=['Error'])) # Setup Model model = get_model(args.arch) memory = get_model(memory) # model = torch.nn.DataParallel( # model, device_ids=range(torch.cuda.device_count())) model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3]) model.cuda() memory = torch.nn.DataParallel(memory, device_ids=[0, 1, 2, 3]) memory.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, betas=(0.9, 0.999), amsgrad=True) optimizer2 = torch.optim.Adam(memory.parameters(), lr=args.l_rate, betas=(0.9, 0.999), amsgrad=True) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = log_loss trained = 0 #scale=100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] + 0.1 mean_loss = best_error / 2 print(best_error) print(trained) # loss_rec=np.load('/home/lidong/Documents/RSCFN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:train_len*trained] test = 0 #exit() #trained=0 else: best_error = 100 best_error_r = 100 trained = 0 mean_loss = 1.0 print('random initialize') print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from rsn!') rsn = torch.load( '/home/lidong/Documents/RSCFN/proup_refine_rsn_cluster_nyu_0_0.59483826_coarse_best_model.pkl', map_location='cpu') model_dict = model.state_dict() #print(model_dict) pre_dict = { k: v for k, v in rsn['model_state'].items() if k in model_dict and rsn['model_state'].items() } #pre_dict={k: v for k, v in rsn.items() if k in model_dict and rsn.items()} #print(pre_dict) key = [] for k, v in pre_dict.items(): if v.shape != model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) #print(pre_dict) # pre_dict['module.regress1.0.conv1.1.weight']=pre_dict['module.regress1.0.conv1.1.weight'][:,:256,:,:] # pre_dict['module.regress1.0.downsample.1.weight']=pre_dict['module.regress1.0.downsample.1.weight'][:,:256,:,:] model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer.load_state_dict(rsn['optimizer_state']) trained = rsn['epoch'] best_error = rsn['error'] + 0.5 mean_loss = best_error / 2 print('load success!') print(best_error) #best_error+=1 #del rsn test = 0 trained = 0 # loss_rec=np.load('/home/lidong/Documents/RSCFN/loss.npy') # loss_rec=list(loss_rec) # loss_rec=loss_rec[:train_len*trained] #exit() zero = torch.zeros(1).cuda() one = torch.ones(1).cuda() # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #scheduler.step() #trained print('training!') model.train() loss_error = 0 loss_error_d = 0 mean_loss_ave = [] for i, (images, labels, regions, segments, image, index) in enumerate(trainloader): images = Variable(images.cuda(0)) labels = Variable(labels.cuda(0)) segments = Variable(segments.cuda(0)) regions = Variable(regions.cuda(0)) index = Variable(index.cuda(0)) iterative_count = 0 if epoch == trained: with torch.no_grad(): optimizer.zero_grad() optimizer2.zero_grad() feature, accurate = model(images, regions, labels, 0, 'train') feature = feature.detach() representation = memory(feature) labels = labels.view_as(accurate) segments = segments.view_as(accurate) regions = regions.view_as(accurate) mask = (labels > alpha) & (labels < beta) mask = mask.float().detach() loss_a = berhu(accurate, labels, mask) if memory_bank == 0: memory_bank = representation else: memory_bank = torch.cat([memory_bank, representation], dim=0) loss = loss_a accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) lin = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels, 2), mask).view(labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1))) log_d = torch.mean( torch.sum(torch.where( mask > 0, torch.abs( torch.log10(accurate) - torch.log10(labels)), mask).view(labels.shape[0], -1), dim=-1) / (torch.sum(mask.view(labels.shape[0], -1), dim=-1) + 1)) loss.backward() optimizer.step() optimizer2.step() loss_rec.append([ i + epoch * train_len, torch.Tensor([loss.item()]).unsqueeze(0).cpu() ]) loss_error += lin.item() loss_error_d += log_d.item() print("data [%d/%d/%d/%d] Loss: %.4f lin: %.4f lin_d:%.4f loss_d:%.4f loss_a:%.4f loss_var:%.4f loss_dis:%.4f loss_reg: %.4f" % (i,train_len, epoch, args.n_epoch,loss.item(),lin.item(),lin_d.item(), loss_d.item(),loss_a.item(), \ torch.sum(loss_v).item(),torch.sum(loss_dis).item(),0.001*torch.sum(loss_reg).item())) mean_loss = np.mean(mean_loss_ave) print("mean_loss:%.4f" % (mean_loss)) if epoch > 50: check = 3 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.5) else: check = 5 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=1) if epoch > 70: check = 2 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=15,gamma=0.25) if epoch > 90: check = 1 #scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=30,gamma=0.1) check = 1 #epoch=10 if epoch % check == 0: print('testing!') model.eval() loss_ave = [] loss_d_ave = [] loss_lin_ave = [] loss_log_ave = [] loss_r_ave = [] error_sum = 0 for i_val, (images_val, labels_val, regions, segments, images) in tqdm(enumerate(valloader)): #print(r'\n') images_val = Variable(images_val.cuda(0), requires_grad=False) labels_val = Variable(labels_val.cuda(0), requires_grad=False) segments_val = Variable(segments.cuda(0), requires_grad=False) regions_val = Variable(regions.cuda(0), requires_grad=False) with torch.no_grad(): depth, accurate, loss_var, loss_dis, loss_reg = model( images_val, regions_val, labels_val, 0, 'eval') accurate = torch.where(accurate > beta, beta * one, accurate) accurate = torch.where(accurate < alpha, alpha * one, accurate) depth = torch.where(depth > beta, beta * one, depth) depth = torch.where(depth < alpha, alpha * one, depth) depth = F.interpolate(depth, scale_factor=scale, mode='nearest').squeeze() accurate = F.interpolate(accurate, scale_factor=scale, mode='nearest').squeeze() labels_val = (labels_val[..., 6 * scale:-6 * scale, 8 * scale:-8 * scale]).view_as(depth) mask = (labels_val > alpha) & (labels_val < beta) mask = mask.float().detach() lin = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) lin_d = torch.mean( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(depth - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) error_sum += torch.sum( torch.sqrt( torch.sum(torch.where( mask > 0, torch.pow(accurate - labels_val, 2), mask).view(labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1))) log_d = torch.mean( torch.sum(torch.where( mask > 0, torch.abs( torch.log10(accurate) - torch.log10(labels_val)), mask).view( labels_val.shape[0], -1), dim=-1) / torch.sum(mask.view(labels_val.shape[0], -1), dim=-1)) loss_ave.append(lin.data.cpu().numpy()) loss_d_ave.append(lin_d.data.cpu().numpy()) loss_log_ave.append(log_d.data.cpu().numpy()) print("error=%.4f,error_d=%.4f,error_log=%.4f" % (lin.item(), lin_d.item(), log_d.item())) error = np.mean(loss_ave) print("error_r=%.4f,error_d=%.4f,error_log=%.4f" % (error, np.mean(loss_d_ave), np.mean(loss_log_ave))) test += 1 print(error_sum / 654) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "proup_refine_{}_{}_{}_{}_coarse_best_model.pkl".format( args.arch, args.dataset, str(epoch), str(error))) print('save success') np.save('/home/lidong/Documents/RSCFN/loss.npy', loss_rec) #exit() if epoch % 3 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "proup_refine_{}_{}_{}_ceoarse_model.pkl".format( args.arch, args.dataset, str(epoch))) print('save success')
def train(args): # Setup Augmentations data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()]) loss_rec = [] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train_region', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='test_region', img_size=(args.img_rows, args.img_cols)) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() # old_window = vis.line(X=torch.zeros((1,)).cpu(), # Y=torch.zeros((1)).cpu(), # opts=dict(xlabel='minibatches', # ylabel='Loss', # title='Trained Loss', # legend=['Loss'])) loss_window1 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss1', legend=['Loss1'])) loss_window2 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss2', legend=['Loss'])) loss_window3 = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss3', legend=['Loss3'])) pre_window1 = vis.image( np.random.rand(480, 640), opts=dict(title='predict1!', caption='predict1.'), ) pre_window2 = vis.image( np.random.rand(480, 640), opts=dict(title='predict2!', caption='predict2.'), ) pre_window3 = vis.image( np.random.rand(480, 640), opts=dict(title='predict3!', caption='predict3.'), ) ground_window = vis.image( np.random.rand(480, 640), opts=dict(title='ground!', caption='ground.'), ) cuda0 = torch.device('cuda:0') cuda1 = torch.device('cuda:1') cuda2 = torch.device('cuda:2') cuda3 = torch.device('cuda:3') # Setup Model rsnet = get_model('rsnet') rsnet = torch.nn.DataParallel(rsnet, device_ids=[0]) rsnet.to(cuda0) drnet = get_model('drnet') drnet = torch.nn.DataParallel(drnet, device_ids=[1]) drnet.to(cuda1) parameters = list(rsnet.parameters()) + list(drnet.parameters()) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate if hasattr(drnet.module, 'optimizer'): optimizer = drnet.module.optimizer else: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999)) optimizer = torch.optim.SGD(parameters, lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(rsnet.module, 'loss'): print('Using custom loss') loss_fn = rsnet.module.loss else: loss_fn = l1_r trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/RSDEN/RSDEN/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] best_error = checkpoint['error'] #print('load success!') loss_rec = np.load('/home/lidong/Documents/RSDEN/RSDEN/loss.npy') loss_rec = list(loss_rec) loss_rec = loss_rec[:3265 * trained] # for i in range(300): # loss_rec[i][1]=loss_rec[i+300][1] for l in range(int(len(loss_rec) / 3265)): if args.visdom: vis.line( X=torch.ones(1).cpu() * loss_rec[l * 3265][0], Y=np.mean( np.array(loss_rec[l * 3265:(l + 1) * 3265])[:, 1]) * torch.ones(1).cpu(), win=old_window, update='append') else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize seperately!') checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/rsnet_nyu_best_model.pkl') rsnet.load_state_dict(checkpoint['model_state']) trained = checkpoint['epoch'] print('load success from rsnet %.d' % trained) checkpoint = torch.load( '/home/lidong/Documents/RSDEN/RSDEN/drnet_nyu_best_model.pkl') drnet.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) trained = checkpoint['epoch'] print('load success from drnet %.d' % trained) trained = 0 best_error = 1 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #trained print('training!') rsnet.train() drnet.train() for i, (images, labels, segments) in enumerate(trainloader): images = images.to(cuda0) labels = labels.to(cuda1) optimizer.zero_grad() region_support = rsnet(images) coarse_depth = torch.cat([images, region_support], 1) outputs = drnet(coarse_depth) #outputs=torch.reshape(outputs,[outputs.shape[0],1,outputs.shape[1],outputs.shape[2]]) #outputs=outputs loss = loss_fn(input=outputs, target=labels) out = loss[0] + loss[1] + loss[2] # print('training:'+str(i)+':learning_rate'+str(loss.data.cpu().numpy())) out.backward() optimizer.step() # print(torch.Tensor([loss.data[0]]).unsqueeze(0).cpu()) #print(loss.item()*torch.ones(1).cpu()) #nyu2_train:246,nyu2_all:3265 if args.visdom: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 3265, Y=loss[0].item() * torch.ones(1).cpu(), win=loss_window1, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 3265, Y=loss[1].item() * torch.ones(1).cpu(), win=loss_window2, update='append') vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 3265, Y=loss[2].item() * torch.ones(1).cpu(), win=loss_window3, update='append') pre = outputs[0].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict1!', caption='predict1.'), win=pre_window1, ) pre = outputs[1].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict2!', caption='predict2.'), win=pre_window2, ) pre = outputs[2].data.cpu().numpy().astype('float32') pre = pre[0, :, :] #pre = np.argmax(pre, 0) pre = (np.reshape(pre, [480, 640]).astype('float32') - np.min(pre)) / (np.max(pre) - np.min(pre)) #pre = pre/np.max(pre) # print(type(pre[0,0])) vis.image( pre, opts=dict(title='predict3!', caption='predict3.'), win=pre_window3, ) ground = labels.data.cpu().numpy().astype('float32') #print(ground.shape) ground = ground[0, :, :] ground = (np.reshape(ground, [480, 640]).astype('float32') - np.min(ground)) / (np.max(ground) - np.min(ground)) vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) loss_rec.append([ i + epoch * 3265, torch.Tensor([loss[0].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[1].item()]).unsqueeze(0).cpu(), torch.Tensor([loss[2].item()]).unsqueeze(0).cpu() ]) print("data [%d/3265/%d/%d] Loss1: %.4f Loss2: %.4f Loss3: %.4f" % (i, epoch, args.n_epoch, loss[0].item(), loss[1].item(), loss[2].item())) #epoch=3 if epoch % 3 == 0: print('testing!') rsnet.train() drnet.train() error_lin = [] error_log = [] error_va = [] error_rate = [] error_absrd = [] error_squrd = [] thre1 = [] thre2 = [] thre3 = [] for i_val, (images, labels, segments) in tqdm(enumerate(valloader)): print(r'\n') images = images.to(cuda0) labels = labels.to(cuda1) optimizer.zero_grad() with torch.no_grad(): region_support = rsnet(images) coarse_depth = torch.cat([images, region_support], 1).to(cuda1) outputs = drnet(coarse_depth) pred = outputs[2].data.cpu().numpy() gt = labels.data.cpu().numpy() ones = np.ones((gt.shape)) zeros = np.zeros((gt.shape)) pred = np.reshape(pred, (gt.shape)) #gt=np.reshape(gt,[4,480,640]) dis = np.square(gt - pred) error_lin.append(np.sqrt(np.mean(dis))) dis = np.square(np.log(gt) - np.log(pred)) error_log.append(np.sqrt(np.mean(dis))) alpha = np.mean(np.log(gt) - np.log(pred)) dis = np.square(np.log(pred) - np.log(gt) + alpha) error_va.append(np.mean(dis) / 2) dis = np.mean(np.abs(gt - pred)) / gt error_absrd.append(np.mean(dis)) dis = np.square(gt - pred) / gt error_squrd.append(np.mean(dis)) thelt = np.where(pred / gt > gt / pred, pred / gt, gt / pred) thres1 = 1.25 thre1.append(np.mean(np.where(thelt < thres1, ones, zeros))) thre2.append( np.mean(np.where(thelt < thres1 * thres1, ones, zeros))) thre3.append( np.mean( np.where(thelt < thres1 * thres1 * thres1, ones, zeros))) #a=thre1[i_val] #error_rate.append(np.mean(np.where(dis<0.6,ones,zeros))) print( "error_lin=%.4f,error_log=%.4f,error_va=%.4f,error_absrd=%.4f,error_squrd=%.4f,thre1=%.4f,thre2=%.4f,thre3=%.4f" % (error_lin[i_val], error_log[i_val], error_va[i_val], error_absrd[i_val], error_squrd[i_val], thre1[i_val], thre2[i_val], thre3[i_val])) error = np.mean(error_lin) #error_rate=np.mean(error_rate) print("error=%.4f" % (error)) if error <= best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_best_model.pkl".format(args.arch, args.dataset)) print('save success') np.save('/home/lidong/Documents/RSDEN/RSDEN//loss.npy', loss_rec) if epoch % 15 == 0: #best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': error, } torch.save( state, "{}_{}_{}_model.pkl".format(args.arch, args.dataset, str(epoch))) print('save success')