def create_refinement_inputs(root_dir, train_eval_dir, classes, intrinsic_matrix): correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) correspondence_block.cuda() correspondence_block_filename = os.path.join(train_eval_dir, 'correspondence_block.pt') correspondence_block.load_state_dict( torch.load(correspondence_block_filename, map_location=torch.device('cpu'))) train_data = LineMODDataset(root_dir, classes=classes, transform=transforms.Compose( [transforms.ToTensor()])) upsampled = nn.Upsample(size=[240, 320], mode='bilinear', align_corners=False) regex = re.compile(r'\d+') count = 0 for i in range(len(train_data)): if i % 1000 == 0: print(str(i) + "/" + str(len(train_data)) + " finished!") img_adr, img, _, _, _ = train_data[i] label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1] idx = regex.findall(os.path.split(img_adr)[1])[0] adr_rendered = root_dir + label + \ "/pose_refinement/rendered/color" + str(idx) + ".png" adr_img = root_dir + label + \ "/pose_refinement/real/color" + str(idx) + ".png" # find the object in the image using the idmask img = img.view(1, img.shape[0], img.shape[1], img.shape[2]) idmask_pred, _, _ = correspondence_block(img.cuda()) idmask = torch.argmax(idmask_pred, dim=1).squeeze().cpu() coord_2d = (idmask == classes[label]).nonzero(as_tuple=True) if coord_2d[0].nelement() != 0: coord_2d = torch.cat( (coord_2d[0].view(coord_2d[0].shape[0], 1), coord_2d[1].view( coord_2d[1].shape[0], 1)), 1) min_x = coord_2d[:, 0].min() max_x = coord_2d[:, 0].max() min_y = coord_2d[:, 1].min() max_y = coord_2d[:, 1].max() img = img.squeeze().transpose(1, 2).transpose(0, 2) obj_img = img[min_x:max_x + 1, min_y:max_y + 1, :] # saving in the correct format using upsampling obj_img = obj_img.transpose(0, 1).transpose(0, 2).unsqueeze(dim=0) obj_img = upsampled(obj_img) obj_img = obj_img.squeeze().transpose(0, 2).transpose(0, 1) # It appears obj_img occasionally has values slightly larger than 1.0. # So I'm assuming that we're only clamping by a slight amount here. # If that's not the case, we need to scale. obj_img = torch.clamp(obj_img, 0.0, 1.0) mpimg.imsave(adr_img, obj_img.squeeze().numpy()) # create rendering for an object cropped_rendered_image = create_rendering(root_dir, intrinsic_matrix, label, idx) rendered_img = torch.from_numpy(cropped_rendered_image) rendered_img = rendered_img.unsqueeze(dim=0) rendered_img = rendered_img.transpose(1, 3).transpose(2, 3) rendered_img = upsampled(rendered_img) rendered_img = rendered_img.squeeze().transpose(0, 2).transpose(0, 1) mpimg.imsave(adr_rendered, rendered_img.numpy()) else: # object not present in idmask prediction count += 1 mpimg.imsave(adr_rendered, np.zeros((240, 320))) mpimg.imsave(adr_img, np.zeros((240, 320))) print("Number of outliers: ", count)
def initial_pose_estimation(root_dir, train_eval_dir, classes, intrinsic_matrix): for c in classes: class_pred_pose_fname = os.path.join(train_eval_dir, c, "predicted_pose") if not os.path.exists(class_pred_pose_fname): os.makedirs(class_pred_pose_fname) # LineMOD Dataset train_data = LineMODDataset(root_dir, train_eval_dir, classes=classes, transform=transforms.Compose( [transforms.ToTensor()])) # load the best correspondence block weights correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) correspondence_block.cuda() correspondence_block_filename = os.path.join(train_eval_dir, 'correspondence_block.pt') correspondence_block.load_state_dict( torch.load(correspondence_block_filename, map_location=torch.device('cpu'))) # initial 6D pose prediction regex = re.compile(r'\d+') outliers = 0 for i in range(len(train_data)): if i % 100 == 0: print(str(i) + "/" + str(len(train_data)) + " finished!") img_adr, img, idmask, _, _ = train_data[i] label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1] idx = regex.findall(os.path.split(img_adr)[1])[0] img = img.view(1, img.shape[0], img.shape[1], img.shape[2]) idmask_pred, umask_pred, vmask_pred = correspondence_block(img.cuda()) # convert the masks to 240,320 shape temp = torch.argmax(idmask_pred, dim=1).squeeze().cpu() upred = torch.argmax(umask_pred, dim=1).squeeze().cpu() vpred = torch.argmax(vmask_pred, dim=1).squeeze().cpu() coord_2d = (temp == classes[label]).nonzero(as_tuple=True) adr = os.path.join( train_eval_dir, label + "/predicted_pose/" + "info_" + str(idx) + ".txt") coord_2d = torch.cat( (coord_2d[0].view(coord_2d[0].shape[0], 1), coord_2d[1].view( coord_2d[1].shape[0], 1)), 1) uvalues = upred[coord_2d[:, 0], coord_2d[:, 1]] vvalues = vpred[coord_2d[:, 0], coord_2d[:, 1]] dct_keys = torch.cat((uvalues.view(-1, 1), vvalues.view(-1, 1)), 1) dct_keys = tuple(dct_keys.numpy()) dct = load_obj(os.path.join(root_dir, label + "/UV-XYZ_mapping")) mapping_2d = [] mapping_3d = [] for count, (u, v) in enumerate(dct_keys): if (u, v) in dct: mapping_2d.append(np.array(coord_2d[count])) mapping_3d.append(dct[(u, v)]) # Get the 6D pose from rotation and translation matrices # PnP needs atleast 6 unique 2D-3D correspondences to run if len(mapping_2d) >= 4 or len(mapping_3d) >= 4: _, rvecs, tvecs, inliers = cv2.solvePnPRansac( np.array(mapping_3d, dtype=np.float32), np.array(mapping_2d, dtype=np.float32), intrinsic_matrix, distCoeffs=None, iterationsCount=150, reprojectionError=1.0, flags=cv2.SOLVEPNP_P3P) rot, _ = cv2.Rodrigues(rvecs, jacobian=None) rot[np.isnan(rot)] = 1 tvecs[np.isnan(tvecs)] = 1 tvecs = np.where(-100 < tvecs, tvecs, np.array([-100.])) tvecs = np.where(tvecs < 100, tvecs, np.array([100.])) rot_tra = np.append(rot, tvecs, axis=1) # save the predicted pose np.savetxt(adr, rot_tra) else: # save a pose full of zeros outliers += 1 rot_tra = np.ones((3, 4)) rot_tra[:, 3] = 0 np.savetxt(adr, rot_tra) print("Number of instances where PnP couldn't be used: ", outliers)
def train_correspondence_block(root_dir, classes, epochs=10): train_data = LineMODDataset(root_dir, classes=classes, transform=transforms.Compose([ transforms.ToTensor(), transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0) ])) batch_size = 4 num_workers = 0 valid_size = 0.2 # obtain training indices that will be used for validation num_train = len(train_data) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] # define samplers for obtaining training and validation batches train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # prepare data loaders (combine dataset and sampler) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers) # architecture for correspondence block - 13 objects + backgound = 14 channels for ID masks correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) correspondence_block.cuda() # custom loss function and optimizer criterion_id = nn.CrossEntropyLoss() criterion_u = nn.CrossEntropyLoss() criterion_v = nn.CrossEntropyLoss() # specify optimizer optimizer = optim.Adam(correspondence_block.parameters(), lr=3e-4, weight_decay=3e-5) # training loop # number of epochs to train the model n_epochs = epochs valid_loss_min = np.Inf # track change in validation loss for epoch in range(1, n_epochs + 1): # keep track of training and validation loss train_loss = 0.0 valid_loss = 0.0 print("------ Epoch ", epoch, " ---------") ################### # train the model # ################### correspondence_block.train() for _, image, idmask, umask, vmask in train_loader: # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block(image) # calculate the batch loss loss_id = criterion_id(idmask_pred, idmask) loss_u = criterion_u(umask_pred, umask) loss_v = criterion_v(vmask_pred, vmask) loss = loss_id + loss_u + loss_v # backward pass: compute gradient of the loss with respect to model parameters loss.backward() # perform a single optimization step (parameter update) optimizer.step() # update training loss train_loss += loss.item() ###################### # validate the model # ###################### correspondence_block.eval() for _, image, idmask, umask, vmask in valid_loader: # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block(image) # calculate the batch loss loss_id = criterion_id(idmask_pred, idmask) loss_u = criterion_u(umask_pred, umask) loss_v = criterion_v(vmask_pred, vmask) loss = loss_id + loss_u + loss_v # update average validation loss valid_loss += loss.item() # calculate average losses train_loss = train_loss / len(train_loader.sampler) valid_loss = valid_loss / len(valid_loader.sampler) # print training/validation statistics print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'. format(epoch, train_loss, valid_loss)) # save model if validation loss has decreased if valid_loss <= valid_loss_min: print( 'Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...' .format(valid_loss_min, valid_loss)) torch.save(correspondence_block.state_dict(), 'correspondence_block.pt') valid_loss_min = valid_loss
def train_correspondence_block(root_dir, train_eval_dir, classes, epochs=10, batch_size=4, \ accum_grad_batch_size=1, out_path_and_name=None, corr_transfer=None): train_data = LineMODDataset(root_dir, train_eval_dir, classes=classes, transform=transforms.Compose([ transforms.ToTensor(), transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0) ])) num_workers = 0 valid_size = 0.2 # obtain training indices that will be used for validation num_train = len(train_data) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] # define samplers for obtaining training and validation batches train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # prepare data loaders (combine dataset and sampler) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers) # architecture for correspondence block - 13 objects + backgound = 14 channels for ID masks correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) if corr_transfer: print("Initializing correspondence block from: %s" % corr_transfer) correspondence_block.load_state_dict( torch.load(corr_transfer, map_location=torch.device('cpu'))) correspondence_block.cuda() if 0: from torchsummary import summary summary(correspondence_block, input_size=(3, 240, 320)) sys.exit(1) # custom loss function and optimizer weight_classes = False if weight_classes: # Using weighted version for class mask as mentioned in the paper # However, not sure what the weighting is, so taking a guess # Note we don't need to normalize when using the default 'reduction' arg class_weights = np.ones(len(classes) + 1) # +1 for background class_weights[0] = 0.1 criterion_id = nn.CrossEntropyLoss( torch.tensor(class_weights, dtype=torch.float32).cuda()) else: criterion_id = nn.CrossEntropyLoss() criterion_u = nn.CrossEntropyLoss() criterion_v = nn.CrossEntropyLoss() # specify optimizer optimizer = optim.Adam(correspondence_block.parameters(), lr=3e-4, weight_decay=3e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, verbose=True) # number of epochs to train the model n_epochs = epochs # track change in validation loss valid_loss_min = np.Inf if accum_grad_batch_size != 1: print("Gradient accumulator batch size: %i" % accum_grad_batch_size) # training loop for epoch in range(1, n_epochs + 1): # keep track of training and validation loss train_loss = 0.0 valid_loss = 0.0 train_idmask_loss = train_umask_loss = train_vmask_loss = 0.0 valid_idmask_loss = valid_umask_loss = valid_vmask_loss = 0.0 print("------ Epoch ", epoch, " ---------") print("Training...") ################### # train the model # ################### batch_cnt = 0 correspondence_block.train() # clear the gradients of all optimized variables optimizer.zero_grad() for img_adr, image, idmask, umask, vmask in train_loader: assert image.shape[1] == correspondence_block.n_channels, \ f'Network has been defined with {correspondence_block.n_channels} input channels, ' \ f'but loaded images have {image.shape[1]} channels. Please check that ' \ 'the images are loaded correctly.' if batch_cnt % 100 == 0: print("Batch %i/%i finished!" % (batch_cnt, len(train_idx) / batch_size)) # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block(image) # Calculate the batch loss # Need to divide by accum_grad_batch_size to account for accumulated gradients loss_id = criterion_id(idmask_pred, idmask) / accum_grad_batch_size loss_u = criterion_u(umask_pred, umask) / accum_grad_batch_size loss_v = criterion_v(vmask_pred, vmask) / accum_grad_batch_size total_loss = loss_id + loss_u + loss_v # backward pass: compute gradient of the loss with respect to model parameters total_loss.backward() # update training loss # Note that .item() also releases the memory. DON'T accumulate the criterion obj train_idmask_loss += loss_id.item() train_umask_loss += loss_u.item() train_vmask_loss += loss_v.item() train_loss += total_loss.item() # Only update once every accum_grad_batch_size if (batch_cnt + 1) % accum_grad_batch_size == 0: # perform a single optimization step (parameter update) optimizer.step() # Reset gradients, for next accumulated batch optimizer.zero_grad() batch_cnt += 1 ###################### # validate the model # ###################### print("Validating...") correspondence_block.eval() optimizer.zero_grad() batch_cnt = 0 with torch.no_grad(): # This is critical to limit GPU memory use for img_adr, image, idmask, umask, vmask in valid_loader: if batch_cnt % 100 == 0: print("Batch %i/%i finished!" % (batch_cnt, len(valid_idx) / batch_size)) # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block( image) # calculate the batch loss loss_id = criterion_id(idmask_pred, idmask) loss_u = criterion_u(umask_pred, umask) loss_v = criterion_v(vmask_pred, vmask) total_loss = loss_id + loss_u + loss_v # update average validation loss valid_idmask_loss += loss_id.item() valid_umask_loss += loss_u.item() valid_vmask_loss += loss_v.item() valid_loss += total_loss.item() batch_cnt += 1 # Calculate average losses train_loss = train_loss / len(train_loader.sampler) train_idmask_loss = train_idmask_loss / len(train_loader.sampler) train_umask_loss = train_umask_loss / len(train_loader.sampler) train_vmask_loss = train_vmask_loss / len(train_loader.sampler) valid_loss = valid_loss / len(valid_loader.sampler) valid_idmask_loss = valid_idmask_loss / len(valid_loader.sampler) valid_umask_loss = valid_umask_loss / len(valid_loader.sampler) valid_vmask_loss = valid_vmask_loss / len(valid_loader.sampler) # print training/validation statistics print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'. format(epoch, train_loss, valid_loss)) print('Train IDMask loss: %.6f \tUMask loss: %.6f \tUMask loss: %.6f' % \ (train_idmask_loss, train_umask_loss, train_vmask_loss)) print('Valid IDMask loss: %.6f \tUMask loss: %.6f \tUMask loss: %.6f' % \ (valid_idmask_loss, valid_umask_loss, valid_vmask_loss)) scheduler.step(valid_loss) # TODO - monitor for train/val divergence and stop # save model if validation loss has decreased # Don't save model on the first epoch in case we're transfer learning and initialized to Inf if valid_loss <= valid_loss_min and (epoch > 1 or epochs == 1): print( 'Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...' .format(valid_loss_min, valid_loss)) if not out_path_and_name: correspondence_block_filename = os.path.join( train_eval_dir, 'correspondence_block.pt') else: correspondence_block_filename = out_path_and_name torch.save(correspondence_block.state_dict(), correspondence_block_filename) valid_loss_min = valid_loss if epoch == 1: valid_loss_min = valid_loss