def train(args): torch.backends.cudnn.benchmark=True # Setup Augmentations loss_rec=[0] best_error=2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='eval', img_size=(args.img_rows, args.img_cols)) valloader = data.DataLoader( v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False) train_length=t_loader.length//2 test_length=v_loader.length//2 trainloader = data.DataLoader( t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader( v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False) model = get_model(args.arch) # parameters=model.named_parameters() # for name,param in parameters: # print(name) # print(param.grad) # exit() model = torch.nn.DataParallel( model, device_ids=[2,3]) #model = torch.nn.DataParallel(model, device_ids=[0]) model.cuda(2) saved_model_path=r'/home/lidong/Documents/CMF/trained/test/' saved_model_dir=os.listdir(saved_model_path) saved_model_dir.sort() for s in range(len(saved_model_dir)): print("Loading model and optimizer from checkpoint '{}'".format(os.path.join(saved_model_path,saved_model_dir[s]))) checkpoint = torch.load(os.path.join(saved_model_path,saved_model_dir[s])) model.load_state_dict(checkpoint['model_state']) print("Loaded checkpoint '{}' (epoch {})" .format(os.path.join(saved_model_path,saved_model_dir[s]), checkpoint['epoch'])) epoch=checkpoint['epoch'] error=10 error_rec=[] error_rec_non=[] error_rec_true=[] error_rec_3=[] #trained print('testing!') model.eval() ones=torch.ones(1).cuda(2) zeros=torch.zeros(1).cuda(2) for i, (left, right,disparity,image) in enumerate(valloader): with torch.no_grad(): start_time=time.time() left = left.cuda(2) right = right.cuda(2) disparity = disparity.cuda(2) local=torch.arange(disparity.shape[-1]).repeat(disparity.shape[0],disparity.shape[1],1).view_as(disparity).float().cuda(2) mask_non = (disparity < 192) & (disparity > 0) &((local-disparity)>=0) mask_true = (disparity < 192) & (disparity > 0)&((local-disparity)>=0) mask = (disparity < 192) & (disparity > 0) mask.detach_() mask_non.detach_() mask_true.detach_() #print(P.shape) #print(left.shape) output1, output2, output3 = model(left,right) #output3 = model(left,right) #print(output3.shape) output1=output3 output1 = torch.squeeze(output1, 1) loss=torch.mean(torch.abs(output1[mask] - disparity[mask])) loss_non=torch.mean(torch.abs(output1[mask_non] - disparity[mask_non])) loss_true=torch.mean(torch.abs(output1[mask_true] - disparity[mask_true])) error_map=torch.where((torch.abs(output1[mask] - disparity[mask])<3) | (torch.abs(output1[mask] - disparity[mask])<0.05*disparity[mask]),ones,zeros) total=torch.where(disparity[mask]>0,ones,zeros) loss_3=100-torch.sum(error_map)/torch.sum(total)*100 #loss = F.l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') error_rec.append(loss.item()) error_rec_non.append(loss_non.item()) error_rec_3.append(loss_3.item()) print(time.time()-start_time) print(np.mean(error_rec_3)) print("data [%d/%d/%d/%d] Loss: %.4f ,Loss_non: %.4f, loss_3: %.4f" % (i, test_length,epoch, args.n_epoch,loss.item(),loss_non.item(),loss_3.item())) #break error=np.mean(error_rec) error_non=np.mean(error_rec_non) error_3=np.mean(error_rec_3) np.save('/home/lidong/Documents/CMF/test/kitti_sub4/epoch:%d_error%.4f_non%.4f_error_3_%.4f.npy'%(epoch-1,error,error_non,error_3),[error_rec,error_rec_non,error_rec_3])
def train(args): torch.backends.cudnn.benchmark = True # Setup Augmentations loss_rec = [0] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols)) trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=4, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=4) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() # old_window = vis.line(X=torch.zeros((1,)).cpu(), # Y=torch.zeros((1)).cpu(), # opts=dict(xlabel='minibatches', # ylabel='Loss', # title='Trained Loss', # legend=['Loss'])) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) pre_window = vis.image( np.random.rand(256, 512), opts=dict(title='predict!', caption='predict.'), ) ground_window = vis.image( np.random.rand(256, 512), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image( np.random.rand(256, 512), opts=dict(title='image!', caption='image.'), ) # Setup Model model = get_model(args.arch) # parameters=model.named_parameters() # for name,param in parameters: # print(name) # print(param.grad) # exit() model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=[0]) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, betas=(0.9, 0.999)) # optimizer = torch.optim.SGD( # model.parameters(), lr=args.l_rate,momentum=0.90, weight_decay=5e-5) # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True) loss_fn = l1 trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] #trained=0 else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from resnet34!') resnet34 = torch.load( '/home/lidong/Documents/CMF/20_bilinear_cmf_flying3d_best_model.pkl' ) #optimizer.load_state_dict(resnet34['optimizer_state']) #model #model.load_state_dict(resnet34['state_dict']) model_dict = model.state_dict() pre_dict = { k: v for k, v in resnet34['model_state'].items() if k in model_dict } # print(pre_dict) # exit() # for k,v in resnet34['state_dict'].items(): # #print('.'.join(k.split('.')[1:])) # print(k) # for k,v in model_dict.items(): # print(k) model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer # opti_dict=optimizer.state_dict() # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict} # # for k,v in pre_dict.items(): # # print(k) # # if k=='state': # # for a,b in v.items(): # # print(a) # # for c,d in b.items(): # # print(c,d) # exit() # #pre_dict=resnet34['optimizer_state'] # opti_dict.update(pre_dict) # optimizer.load_state_dict(opti_dict) print('load success!') trained = 0 #best_error=5 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): #for epoch in range(0, args.n_epoch): #trained print('training!') model.train() for i, (left, right, disparity, image) in enumerate(trainloader): #with torch.no_grad(): #print(left.shape) #print(torch.max(image),torch.min(image)) start_time = time.time() left = left.cuda() right = right.cuda() disparity = disparity.cuda() mask = (disparity < 192) & (disparity >= 0) mask.detach_() optimizer.zero_grad() #print(P.shape) output1, output2, output3 = model(left, right) #print(output3.shape) output1 = torch.squeeze(output1, 1) output2 = torch.squeeze(output2, 1) output3 = torch.squeeze(output3, 1) # #outputs=outputs loss = 0.5 * F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean') \ + 0.7 * F.smooth_l1_loss(output2[mask], disparity[mask], reduction='elementwise_mean') \ + F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') #loss=loss/2.2 #output3 = model(left,right) #loss = F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') loss.backward() #parameters=model.named_parameters() optimizer.step() #torch.cuda.empty_cache() #print(loss.item) if args.visdom == True: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * 5457, Y=loss.item() * torch.ones(1).cpu() / 2.2, win=loss_window, update='append') #print(torch.max(output3).item(),torch.min(output3).item()) if i % 15 == 0: #print(output3.shape) pre = output3.data.cpu().numpy().astype('float32') pre = pre[0, :, :] #print(np.max(pre)) #print(pre.shape) pre = np.reshape(pre, [256, 512]).astype('float32') vis.image( pre, opts=dict(title='predict!', caption='predict.'), win=pre_window, ) ground = disparity.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = np.reshape(ground, [256, 512]).astype('float32') vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) image = image.data.cpu().numpy().astype('float32') image = image[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) image = np.reshape(image, [3, 256, 512]).astype('float32') vis.image( image, opts=dict(title='image!', caption='image.'), win=image_window, ) loss_rec.append(loss.item()) print(time.time() - start_time) print("data [%d/5457/%d/%d] Loss: %.4f" % (i, epoch, args.n_epoch, loss.item() / 2.2)) state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), } np.save('loss.npy', loss_rec) torch.save( state, "{}_{}_{}_best_model.pkl".format(epoch, args.arch, args.dataset))
def train(args): torch.backends.cudnn.benchmark=True # Setup Augmentations loss_rec=[0] best_error=2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='eval', img_size=(args.img_rows, args.img_cols)) valloader = data.DataLoader( v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False) train_length=t_loader.length//2 test_length=v_loader.length//2 trainloader = data.DataLoader( t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True) valloader = data.DataLoader( v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom(env='kitti_sub_4') error_window = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='test error', legend=['Error'])) loss_window = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) pre_window = vis.image( np.random.rand(256, 512), opts=dict(title='predict!', caption='predict.'), ) ground_window = vis.image( np.random.rand(256, 512), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image( np.random.rand(256, 512), opts=dict(title='image!', caption='image.'), ) # Setup Model model = get_model(args.arch) # parameters=model.named_parameters() # for name,param in parameters: # print(name) # print(param.grad) # exit() model = torch.nn.DataParallel( model, device_ids=[2,3]) #model = torch.nn.DataParallel(model, device_ids=[0]) model.cuda(2) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,betas=(0.9,0.999)) optimizer = torch.optim.SGD( model.parameters(), lr=args.l_rate,momentum=0.90, weight_decay=5e-5) # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True) loss_fn = l1 trained=0 scale=100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) trained=checkpoint['epoch'] best_error=10 # loss_rec=np.load('/home/lidong/Documents/CMF/loss_8.npy') # loss_rec=list(loss_rec) # print(train_length) # loss_rec=loss_rec[:train_length*trained] else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from resnet34!') resnet34=torch.load('/home/lidong/Documents/CMF/9_cm_sub_4_flying3d_best_model.pkl') #optimizer.load_state_dict(resnet34['optimizer_state']) #model #model.load_state_dict(resnet34['state_dict']) model_dict=model.state_dict() pre_dict={k: v for k, v in resnet34['model_state'].items() if k in model_dict} key=[] for k,v in pre_dict.items(): if v.shape!=model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer # opti_dict=optimizer.state_dict() # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict} # # for k,v in pre_dict.items(): # # print(k) # # if k=='state': # # for a,b in v.items(): # # print(a) # # for c,d in b.items(): # # print(c,d) # exit() # #pre_dict=resnet34['optimizer_state'] # opti_dict.update(pre_dict) # optimizer.load_state_dict(opti_dict) print('load success!') trained=0 #best_error=5 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): # print('training!') # model.train() # for i, (left, right,disparity,image) in enumerate(trainloader): # #break # #with torch.no_grad(): # #print(left.shape) # #print(torch.max(image),torch.min(image)) # start_time=time.time() # left = left.cuda(2) # right = right.cuda(2) # disparity = disparity.cuda(2) # mask = (disparity < 192) & (disparity >0) # mask.detach_() # optimizer.zero_grad() # #print(P.shape) # output1, output2, output3 = model(left,right) # #print(output3.shape) # # output1 = torch.squeeze(output1, 1) # # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean') # output1 = torch.squeeze(output1, 1) # output2 = torch.squeeze(output2, 1) # output3 = torch.squeeze(output3, 1) # # #outputs=outputs # loss = 0.5 * F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean') \ # + 0.7 * F.smooth_l1_loss(output2[mask], disparity[mask], reduction='elementwise_mean') \ # + F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') # #loss=loss/2.2 # #output3 = model(left,right) # #loss = F.smooth_l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') # loss.backward() # #parameters=model.named_parameters() # optimizer.step() # #torch.cuda.empty_cache() # #print(loss.item) # if args.visdom ==True: # vis.line( # X=torch.ones(1).cpu() * i+torch.ones(1).cpu() *(epoch-trained)*train_length, # Y=loss.item()*torch.ones(1).cpu(), # win=loss_window, # update='append') # #print(torch.max(output3).item(),torch.min(output3).item()) # if i%1==0: # #print(output3.shape) # pre = output3.data.cpu().numpy().astype('float32') # pre = pre[0,:,:] # #print(np.max(pre)) # #print(pre.shape) # pre = np.reshape(pre, [256,512]).astype('float32') # vis.image( # pre, # opts=dict(title='predict!', caption='predict.'), # win=pre_window, # ) # ground=disparity.data.cpu().numpy().astype('float32') # ground = ground[0, :, :] # ground = np.reshape(ground, [256,512]).astype('float32') # vis.image( # ground, # opts=dict(title='ground!', caption='ground.'), # win=ground_window, # ) # image=image.data.cpu().numpy().astype('float32') # image = image[0,...] # #image=image[0,...] # #print(image.shape,np.min(image)) # image = np.reshape(image, [3,256,512]).astype('float32') # vis.image( # image, # opts=dict(title='image!', caption='image.'), # win=image_window, # ) # loss_rec.append(loss.item()) # print(time.time()-start_time) # print("data [%d/%d/%d/%d] Loss: %.4f" % (i,train_length, epoch, args.n_epoch,loss.item()/2.2)) error=10 error_rec=[] error_rec_non=[] error_rec_true=[] #trained print('testing!') model.eval() for i, (left, right,disparity,image) in enumerate(valloader): with torch.no_grad(): start_time=time.time() left = left.cuda(2) right = right.cuda(2) disparity = disparity.cuda(2) local=torch.arange(disparity.shape[-1]).repeat(disparity.shape[0],disparity.shape[1],1).view_as(disparity).float().cuda(2) mask_non = (disparity < 192) & (disparity > 0) &((local-disparity)>=0) mask_true = (disparity < 192) & (disparity > 0)&((local-disparity)>=0) mask = (disparity < 192) & (disparity > 0) mask.detach_() mask_non.detach_() mask_true.detach_() #print(P.shape) #print(left.shape) output1, output2, output3 = model(left,right) #output3 = model(left,right) #print(output3.shape) output1=output3 output1 = torch.squeeze(output1, 1) loss=torch.mean(torch.abs(output1[mask] - disparity[mask])) loss_non=torch.mean(torch.abs(output1[mask_non] - disparity[mask_non])) loss_true=torch.mean(torch.abs(output1[mask_true] - disparity[mask_true])) #loss = F.l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') error_rec.append(loss.item()) error_rec_non.append(loss_non.item()) error_rec_true.append(loss_true.item()) if args.visdom ==True: vis.line( X=torch.ones(1).cpu() * i+torch.ones(1).cpu() *(epoch-trained)*train_length, Y=loss.item()*torch.ones(1).cpu(), win=loss_window, update='append') print(time.time()-start_time) print("data [%d/%d/%d/%d] Loss: %.4f ,Loss_non: %.4f, Loss_true: %.4f" % (i, test_length,epoch, args.n_epoch,loss.item(),loss_non.item(),loss_true.item())) error=np.mean(error_rec) error_non=np.mean(error_rec_non) error_true=np.mean(error_rec_true) if error<best_error: best_error=error state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error':best_error} np.save('loss_4.npy',loss_rec) torch.save(state, "{}_{}_{}_{}_best_model.pkl".format(epoch,args.arch,args.dataset,best_error)) if epoch%15==0: state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error':best_error} np.save('loss_4.npy',loss_rec) torch.save(state, "{}_{}_{}_{}_best_model.pkl".format(epoch,args.arch,args.dataset,best_error))
def train(args): torch.backends.cudnn.benchmark = True # Setup Augmentations loss_rec = [0] best_error = 2 # Setup Dataloader data_path = get_data_path(args.dataset) data_loader = get_loader(args.dataset) v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols)) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2, shuffle=False) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel(model, device_ids=range(2)) test_length = v_loader.__len__() / 2 #model = torch.nn.DataParallel(model, device_ids=[0]) model.cuda() saved_model_path = r'/home/lidong/Documents/CMF/all_data/test/' saved_model_dir = os.listdir(saved_model_path) saved_model_dir.sort() for s in range(len(saved_model_dir)): print("Loading model and optimizer from checkpoint '{}'".format( os.path.join(saved_model_path, saved_model_dir[s]))) checkpoint = torch.load( os.path.join(saved_model_path, saved_model_dir[s])) model.load_state_dict(checkpoint['model_state']) print("Loaded checkpoint '{}' (epoch {})".format( os.path.join(saved_model_path, saved_model_dir[s]), checkpoint['epoch'])) epoch = checkpoint['epoch'] error = 0 error_rec = [] error_rec_non = [] error_rec_true = [] #trained print('training!') model.eval() for i, (left, right, disparity, image) in enumerate(valloader): with torch.no_grad(): start_time = time.time() left = left.cuda() right = right.cuda() disparity = disparity.cuda()[:, :540, :960] local = torch.arange(disparity.shape[-1]).repeat( disparity.shape[0], disparity.shape[1], 1).view_as(disparity).float().cuda() mask_non = (disparity < 192) & (disparity >= 0) & ( (local - disparity) >= 0) mask_true = (disparity < 192) & (disparity > 0) & ( (local - disparity) >= 0) mask = (disparity < 192) & (disparity >= 0) mask.detach_() mask_non.detach_() mask_true.detach_() #print(P.shape) #print(left.shape) output1, output2, output3 = model(left, right) #output3 = model(left,right) #print(output3.shape) output1 = output3 output1 = torch.squeeze(output1, 1)[:, :540, :960] # print(output3.shape,disparity.shape) # exit() # print(torch.sum(torch.where(disparity==0,torch.ones(1).cuda(),torch.zeros(1).cuda()))) # print(torch.sum(torch.where(disparity<=1,torch.ones(1).cuda(),torch.zeros(1).cuda()))) # print(torch.sum(torch.where(disparity<=2,torch.ones(1).cuda(),torch.zeros(1).cuda()))) # print(torch.sum(torch.where(disparity<=3,torch.ones(1).cuda(),torch.zeros(1).cuda()))) # print(disparity.shape) #output3=torch.where(output3<1,torch.zeros(1).cuda(),output3) loss = torch.mean(torch.abs(output1[mask] - disparity[mask])) loss_non = torch.mean( torch.abs(output1[mask_non] - disparity[mask_non])) loss_true = torch.mean( torch.abs(output1[mask_true] - disparity[mask_true])) #loss = F.l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') error_rec.append(loss.item()) error_rec_non.append(loss_non.item()) error_rec_true.append(loss_true.item()) print(time.time() - start_time) print( "data [%d/%d/%d/%d] Loss: %.4f ,Loss_non: %.4f, Loss_true: %.4f" % (i, test_length, epoch, args.n_epoch, loss.item(), loss_non.item(), loss_true.item())) #break error = np.mean(error_rec) error_non = np.mean(error_rec_non) error_true = np.mean(error_rec_true) np.save( '/home/lidong/Documents/CMF/all_data_test/4_sub/epoch:%d_error%.4f_non%.4f_true%.4f.npy' % (epoch - 1, error, error_non, error_true), [error_rec, error_rec_non, error_rec_true])
def train(args): torch.backends.cudnn.benchmark = True # Setup Augmentations loss_rec = [0] best_error = 2 # Setup Dataloader data_path = get_data_path(args.dataset) data_loader = get_loader(args.dataset) v_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols)) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=4, shuffle=False) # Setup Model model = get_model(args.arch) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) #model = torch.nn.DataParallel(model, device_ids=[0]) model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) epoch = checkpoint['epoch'] #trained=0 error = 0 error_rec = [] #trained print('training!') model.eval() for i, (left, right, disparity, image) in enumerate(valloader): with torch.no_grad(): start_time = time.time() left = left.cuda() right = right.cuda() disparity = disparity.cuda()[:540, ...] mask = (disparity < 192) & (disparity >= 0) mask.detach_() #print(P.shape) output1, output2, output3 = model(left, right) output3 = torch.squeeze(output3, 1)[:540, ...] loss = F.l1_loss(output3[mask], disparity[mask], reduction='elementwise_mean') error_rec.append(loss) print(time.time() - start_time) print("data [%d/1062/%d/%d] Loss: %.4f" % (i, epoch, args.n_epoch, loss.item())) break error = np.mean(error_rec) np.save( '/home/lidong/Documents/CMF/test/cmf/error:%.4f,epoch:%d.npy' % (error, epoch), error_rec)
def train(args): torch.backends.cudnn.benchmark = True # Setup Augmentations loss_rec = [0] best_error = 2 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='test', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='eval', img_size=(args.img_rows, args.img_cols)) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=1, shuffle=False) train_length = t_loader.length test_length = v_loader.length trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=1, shuffle=False) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=1, shuffle=False) with torch.no_grad(): # Setup Model model = get_model(args.arch) # parameters=model.named_parameters() # for name,param in parameters: # print(name) # print(param.grad) # exit() model = torch.nn.DataParallel(model, device_ids=[0]) #model = torch.nn.DataParallel(model, device_ids=[0]) model.cuda() # Check if model has custom optimizer / loss # modify to adam, modify the learning rate # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,betas=(0.9,0.999)) optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.90, weight_decay=5e-5) # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True) loss_fn = l1 trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] trained = 0 best_error = 5 # loss_rec=np.load('/home/lidong/Documents/CMF/loss_8.npy') # loss_rec=list(loss_rec) # print(train_length) # loss_rec=loss_rec[:train_length*trained] else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from resnet34!') resnet34 = torch.load( '/home/lidong/Documents/CMF/9_cm_sub_4_flying3d_best_model.pkl') #optimizer.load_state_dict(resnet34['optimizer_state']) #model #model.load_state_dict(resnet34['state_dict']) model_dict = model.state_dict() pre_dict = { k: v for k, v in resnet34['model_state'].items() if k in model_dict } key = [] for k, v in pre_dict.items(): if v.shape != model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer # opti_dict=optimizer.state_dict() # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict} # # for k,v in pre_dict.items(): # # print(k) # # if k=='state': # # for a,b in v.items(): # # print(a) # # for c,d in b.items(): # # print(c,d) # exit() # #pre_dict=resnet34['optimizer_state'] # opti_dict.update(pre_dict) # optimizer.load_state_dict(opti_dict) print('load success!') trained = 0 #best_error=5 # it should be range(checkpoint[''epoch],args.n_epoch) print('training!') model.eval() loss_3_rec = [] ones = torch.ones(1).cuda() zeros = torch.zeros(1).cuda() for i, (left, right, disparity, image, name, h, w) in enumerate(trainloader): #break with torch.no_grad(): #print(left.shape) print(name[0]) #print(torch.max(image),torch.min(image)) h = h.data.cpu().numpy().astype('int32') #h=h.astype('int') w = w.data.cpu().numpy().astype('int32') start_time = time.time() left = left.cuda() right = right.cuda() disparity = disparity.cuda() mask = (disparity < 192) & (disparity > 0) mask.detach_() optimizer.zero_grad() #print(P.shape) output1, output2, output3 = model(left, right) #print(output3.shape) # output1 = torch.squeeze(output1, 1) # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='elementwise_mean') output1 = torch.squeeze(output1, 1) output2 = torch.squeeze(output2, 1) output3 = torch.squeeze(output3, 1) #output3=torch.where(output3>ones*128,torch.mean(output3),output3) print(torch.max(output3), torch.min(output3)) output3 = output3 * 256 pre = output3.data.cpu().numpy().astype('uint16') pre = pre[0, -h[0]:, -w[0]:] #print(np.max(pre)) #print(pre.shape) pre = np.reshape(pre, [h[0], w[0]]) cv2.imwrite( os.path.join( '/home/lidong/Documents/datasets/kitti12/disp_occ', name[0] + '.png'), pre)
def train(args): torch.backends.cudnn.benchmark = True # Setup Augmentations loss_rec = [0] best_error = 5 # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='eval', img_size=(args.img_rows, args.img_cols)) train_length = t_loader.length // args.batch_size test_length = v_loader.length // args.batch_size trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=True) evalloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=args.batch_size, shuffle=False) train_length = len(trainloader) test_length = len(evalloader) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom(env='kitti_sub_4') error_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='error', title='test error', legend=['Error'])) loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) pre_window = vis.image( np.random.rand(256, 512), opts=dict(title='predict!', caption='predict.'), ) ground_window = vis.image( np.random.rand(256, 512), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image( np.random.rand(256, 512), opts=dict(title='image!', caption='image.'), ) error3_window = vis.image( np.random.rand(256, 512), opts=dict(title='error!', caption='error.'), ) # Setup Model model = get_model(args.arch) # parameters=model.named_parameters() # for name,param in parameters: # print(name) # print(param.grad) # exit() model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3]) #model = torch.nn.DataParallel(model, device_ids=[0]) model.cuda(0) # Check if model has custom optimizer / loss # modify to adam, modify the learning rate optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, betas=(0.9, 0.999)) # optimizer = torch.optim.SGD( # model.parameters(), lr=args.l_rate,momentum=0.90, weight_decay=5e-5) # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate,weight_decay=5e-4,betas=(0.9,0.999),amsgrad=True) loss_fn = l1 trained = 0 scale = 100 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) #model_dict=model.state_dict() #opt=torch.load('/home/lidong/Documents/cmf/cmf/exp1/l2/sgd/log/83/rsnet_nyu_best_model.pkl') model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optimizer_state']) #opt=None print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) trained = checkpoint['epoch'] #best_error=checkpoint['error']+1 #mean_loss=checkpoint['error'] best_error = 100 mean_loss = 100 print(mean_loss) #trained=0 # loss_rec=np.load('/home/lidong/Documents/CMF/loss_8.npy') # loss_rec=list(loss_rec) # print(train_length) # loss_rec=loss_rec[:train_length*trained] else: print("No checkpoint found at '{}'".format(args.resume)) print('Initialize from resnet34!') resnet34 = torch.load( '/home/lidong/Documents/CMF/466_cmfsm_kitti_0.591322544373964_error3_1.8297886095548932_six_best_model.pkl' ) #optimizer.load_state_dict(resnet34['optimizer_state']) #model #model.load_state_dict(resnet34['state_dict']) model_dict = model.state_dict() pre_dict = { k: v for k, v in resnet34['model_state'].items() if k in model_dict } key = [] for k, v in pre_dict.items(): if v.shape != model_dict[k].shape: key.append(k) for k in key: pre_dict.pop(k) model_dict.update(pre_dict) model.load_state_dict(model_dict) #optimizer # opti_dict=optimizer.state_dict() # pre_dict={k: v for k, v in resnet34['optimizer_state'].items() if k in opti_dict} # # for k,v in pre_dict.items(): # # print(k) # # if k=='state': # # for a,b in v.items(): # # print(a) # # for c,d in b.items(): # # print(c,d) # exit() # #pre_dict=resnet34['optimizer_state'] # opti_dict.update(pre_dict) # optimizer.load_state_dict(opti_dict) print('load success!') trained = 0 #best_error=5 # it should be range(checkpoint[''epoch],args.n_epoch) for epoch in range(trained, args.n_epoch): ones = torch.ones(1).cuda(0) zeros = torch.zeros(1).cuda(0) print('training!') model.train() epe_rec = [] loss_3_re = [] for i, (left, right, disparity, image) in enumerate(trainloader): # if epoch==trained: # break #break #with torch.no_grad(): #print(left.shape) #print(torch.max(image),torch.min(image)) flag = 1 count = 0 start_time = time.time() left = left.cuda(0) right = right.cuda(0) disparity = disparity.cuda(0) mask = (disparity < 192) & (disparity > 0) mask.detach_() iterative_count = 0 while (flag): optimizer.zero_grad() #print(P.shape) output1, output2, output3 = model(left, right) #print(output3.shape) # output1 = torch.squeeze(output1, 1) # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='mean') output1 = torch.squeeze(output1, 1) output2 = torch.squeeze(output2, 1) output3 = torch.squeeze(output3, 1) # #outputs=outputs #test the l2 loss to reduce the error3 #increase the weight for the error more than 3. # loss = 0.5 * softl1loss(output1[mask], disparity[mask]) \ # + 0.7 * softl1loss(output2[mask], disparity[mask]) \ # + softl1loss(output3[mask], disparity[mask]) loss = 0.5 * F.smooth_l1_loss(output1[mask], disparity[mask],reduction='mean') \ + 0.7 * F.smooth_l1_loss(output2[mask], disparity[mask], reduction='mean') \ + F.smooth_l1_loss(output3[mask], disparity[mask], reduction='mean') #loss=loss/2.2 #output3 = model(left,right) #output1=output3+0 output3 = torch.squeeze(output3, 1) epe = torch.mean(torch.abs(output3[mask] - disparity[mask])) error_map = torch.where( (torch.abs(output3[mask] - disparity[mask]) < 3) | (torch.abs(output3[mask] - disparity[mask]) < 0.05 * disparity[mask]), ones, zeros) #total=torch.where(disparity[mask]>0,ones,zeros) loss_3 = 100 - torch.sum(error_map) / torch.sum(mask) * 100 #loss = F.smooth_l1_loss(output3[mask], disparity[mask], reduction='mean') #loss.backward() #parameters=model.named_parameters() #optimizer.step() if args.visdom: if iterative_count > 0: error_map = torch.where( (torch.abs(output3 - disparity) >= 3) | (torch.abs(output3 - disparity) >= 0.05 * disparity), ones, zeros) * mask.float() #print(output3.shape) pre = output3.data.cpu().numpy().astype('float32') pre = pre[0, :, :] #print(np.max(pre)) #print(pre.shape) pre = np.reshape(pre, [256, 512]).astype('float32') vis.image( pre, opts=dict(title='predict!', caption='predict.'), win=pre_window, ) error_map = error_map.data.cpu().numpy().astype( 'float32') error_map = error_map[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) error_map = np.reshape(error_map, [256, 512]).astype('float32') vis.image( error_map, opts=dict(title='error!', caption='error.'), win=error3_window, ) else: error_map = torch.where( (torch.abs(output3 - disparity) >= 3) | (torch.abs(output3 - disparity) >= 0.05 * disparity), ones, zeros) * mask.float() #print(output3.shape) pre = output3.data.cpu().numpy().astype('float32') pre = pre[0, :, :] #print(np.max(pre)) #print(pre.shape) pre = np.reshape(pre, [256, 512]).astype('float32') vis.image( pre, opts=dict(title='predict!', caption='predict.'), win=pre_window, ) ground = disparity.data.cpu().numpy().astype('float32') ground = ground[0, :, :] ground = np.reshape(ground, [256, 512]).astype('float32') vis.image( ground, opts=dict(title='ground!', caption='ground.'), win=ground_window, ) image = image.data.cpu().numpy().astype('float32') image = image[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) image = np.reshape(image, [3, 256, 512]).astype('float32') vis.image( image, opts=dict(title='image!', caption='image.'), win=image_window, ) error_map = error_map.data.cpu().numpy().astype( 'float32') error_map = error_map[0, ...] #image=image[0,...] #print(image.shape,np.min(image)) error_map = np.reshape(error_map, [256, 512]).astype('float32') vis.image( error_map, opts=dict(title='error!', caption='error.'), win=error3_window, ) if iterative_count == 0: #min_loss3_t=epe min_loss3_t = loss_3 if epoch <= trained + 1000: loss_bp = loss loss.backward() epe_rec.append(epe.item()) optimizer.step() break # else: # loss_bp=0.1*loss # loss.backward() # epe_rec.append(epe.item()) # optimizer.step() # break #if (epe<=1.25*mean_loss) : if (loss_3 <= 1.25): #loss_bp=loss*torch.pow(100,-(mean_loss-lin)/mean_loss) #loss_bp=loss*zero print('no back') # if epe<=0.75*mean_loss: # loss_bp=0.1*loss # else: # loss_bp=0.1*loss #optimizer.step() loss_bp.backward() epe_rec.append(epe.item()) optimizer.step() break else: #print(torch.pow(10,torch.min(one,(lin-mean_loss)/mean_loss)).item()) print('back') #loss=loss*torch.pow(10,torch.min(one,(lin-mean_loss)/mean_loss)) # if epe>1.5*mean_loss: # loss_bp=10*loss # else: # loss_bp=loss if loss_3 > 2: loss_bp = loss else: loss_bp = loss loss_bp.backward() optimizer.step() #if epe<=mean_loss or iterative_count>5 : if loss_3 <= 1.25 or iterative_count > 8: if loss_3 < min_loss3_t: epe_rec.append(epe.item()) # mean_loss=np.mean(epe_rec) break else: min_loss3_t = torch.min(loss_3, min_loss3_t) #if lin<1.5*mean_loss: iterative_count += 1 print( "repeat data [%d/%d/%d/%d] Loss: %.4f error_3: %.4f " % (i, train_length, epoch, args.n_epoch, epe.item(), loss_3.item())) else: min_loss3_t = torch.min(loss_3, min_loss3_t) #if lin<1.5*mean_loss: iterative_count += 1 print( "repeat data [%d/%d/%d/%d] Loss: %.4f error_3: %.4f " % (i, train_length, epoch, args.n_epoch, epe.item(), loss_3.item())) #torch.cuda.empty_cache() #print(loss.item) if args.visdom == True: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * train_length, Y=epe.item() * torch.ones(1).cpu(), win=loss_window, update='append') #print(torch.max(output3).item(),torch.min(output3).item()) # if i%1==0: # error_map=torch.where((torch.abs(output3 - disparity)>=3) | (torch.abs(output3 - disparity)>=0.05*disparity),ones,zeros) * mask.float() # #print(output3.shape) # pre = output3.data.cpu().numpy().astype('float32') # pre = pre[0,:,:] # #print(np.max(pre)) # #print(pre.shape) # pre = np.reshape(pre, [256,512]).astype('float32') # vis.image( # pre, # opts=dict(title='predict!', caption='predict.'), # win=pre_window, # ) # ground=disparity.data.cpu().numpy().astype('float32') # ground = ground[0, :, :] # ground = np.reshape(ground, [256,512]).astype('float32') # vis.image( # ground, # opts=dict(title='ground!', caption='ground.'), # win=ground_window, # ) # image=image.data.cpu().numpy().astype('float32') # image = image[0,...] # #image=image[0,...] # #print(image.shape,np.min(image)) # image = np.reshape(image, [3,256,512]).astype('float32') # vis.image( # image, # opts=dict(title='image!', caption='image.'), # win=image_window, # ) # error_map=error_map.data.cpu().numpy().astype('float32') # error_map = error_map[0,...] # #image=image[0,...] # #print(image.shape,np.min(image)) # error_map = np.reshape(error_map, [256,512]).astype('float32') # vis.image( # error_map, # opts=dict(title='error!', caption='error.'), # win=error3_window, # ) loss_rec.append(loss.item()) print(time.time() - start_time) print("data [%d/%d/%d/%d] Loss: %.4f, loss_3:%.4f" % (i, train_length, epoch, args.n_epoch, epe.item(), loss_3.item())) loss_3_re.append(loss_3.item()) print('epe:', np.mean(epe_rec)) print('loss_3:', np.mean(loss_3_re)) mean_loss = np.mean(epe_rec) #eval print('testing!') model.eval() epe_rec = [] loss_3_re = [] for i, (left, right, disparity, image) in tqdm(enumerate(evalloader)): #break #with torch.no_grad(): #print(left.shape) #print(torch.max(image),torch.min(image)) with torch.no_grad(): count = 0 start_time = time.time() left = left.cuda(0) right = right.cuda(0) disparity = disparity.cuda(0) mask = (disparity < 192) & (disparity > 0) mask.detach_() iterative_count = 0 optimizer.zero_grad() #print(P.shape) output1, output2, output3 = model(left, right) #print(output3.shape) # output1 = torch.squeeze(output1, 1) # loss = F.smooth_l1_loss(output1[mask], disparity[mask],reduction='mean') # output1 = torch.squeeze(output1, 1) # output2 = torch.squeeze(output2, 1) # output3 = torch.squeeze(output3, 1) # # #outputs=outputs # loss = 0.5 * F.mse_loss(output1[mask], disparity[mask],reduction='mean') \ # + 0.7 * F.mse_loss(output2[mask], disparity[mask], reduction='mean') \ # + F.mse_loss(output3[mask], disparity[mask], reduction='mean') #loss=loss/2.2 #output3 = model(left,right) #output1=output3 output3 = torch.squeeze(output3, 1) error_map = torch.where( (torch.abs(output3[mask] - disparity[mask]) < 3) | (torch.abs(output3[mask] - disparity[mask]) < 0.05 * disparity[mask]), ones, zeros) #total=torch.where(disparity[mask]>0,ones,zeros) loss_3 = 100 - torch.sum(error_map) / torch.sum(mask) * 100 epe = torch.mean(torch.abs(output3[mask] - disparity[mask])) epe_rec.append(epe.item()) loss_3_re.append(loss_3.item()) if args.visdom == True: vis.line(X=torch.ones(1).cpu() * i + torch.ones(1).cpu() * (epoch - trained) * test_length, Y=loss_3.item() * torch.ones(1).cpu(), win=error_window, update='append') #print(torch.max(output3).item(),torch.min(output3).item()) #loss_rec.append(loss.item()) print(time.time() - start_time) print("data [%d/%d/%d/%d] Loss: %.4f, loss_3:%.4f" % (i, test_length, epoch, args.n_epoch, epe.item(), loss_3.item())) if loss_3.item() > 10: pre = output3.data.cpu().numpy().astype('float32') pre = pre[0, :, :] cv2.imwrite( os.path.join('/home/lidong/Documents/CMF/visual/', str(i), 'pre.png'), pre) ground = disparity.data.cpu().numpy().astype('float32') ground = ground[0, :, :] cv2.imwrite( os.path.join('/home/lidong/Documents/CMF/visual/', str(i), 'ground.png'), ground) image = image.data.cpu().numpy().astype('uint8') image = image[0, ...] print(image.shape) image = np.transpose(image, [1, 2, 0])[..., ::-1] cv2.imwrite( os.path.join('/home/lidong/Documents/CMF/visual/', str(i), 'image.png'), image) #exit() print('epe:', np.mean(epe_rec)) print('loss_3:', np.mean(loss_3_re)) error = np.mean(loss_3_re) # if epoch>400: # optimizer = torch.optim.Adam( # model.parameters(), lr=args.l_rate/10,betas=(0.9,0.999)) if error < best_error: best_error = error state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': np.mean(epe_rec), 'error3': np.mean(loss_3_re) } #np.save('loss_4.npy',loss_rec) torch.save( state, "{}_{}_{}_{}_error3_{}_four_disparity_model.pkl".format( epoch, args.arch, args.dataset, np.mean(epe_rec), np.mean(loss_3_re))) if epoch % 50 == 0: state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'error': np.mean(epe_rec), 'error3': np.mean(loss_3_re) } #np.save('loss_4.npy',loss_rec) torch.save( state, "{}_{}_{}_{}_error3_{}_four_disparity_model.pkl".format( epoch, args.arch, args.dataset, np.mean(epe_rec), np.mean(loss_3_re)))