class TransitivityLoss(nn.Module): def __init__(self, transform='affine', use_cuda=True, dist_metric='L2'): super(TransitivityLoss, self).__init__() self.pointTnf = PointTnf(use_cuda=use_cuda) self.dist_metric = dist_metric self.transform = transform def forward(self, coord, theta_forward_1, theta_forward_2, theta_equiv): batch = theta_forward_1.size()[0] b,h,w = coord.size() coord = Variable(coord.expand(batch, h, w)) im_size = Variable(torch.FloatTensor([[15, 15, 1]])) target_norm = PointsToUnitCoords(coord, im_size).cuda() if self.transform == 'affine': forward_norm_1 = self.pointTnf.affPointTnf(theta_forward_1, target_norm) forward_coord_1 = PointsToPixelCoords(forward_norm_1, im_size.cuda()) forward_norm_2 = self.pointTnf.affPointTnf(theta_forward_2, forward_norm_1) forward_coord_2 = PointsToPixelCoords(forward_norm_2, im_size.cuda()) equiv_norm = self.pointTnf.affPointTnf(theta_equiv, target_norm) equiv_coord = PointsToPixelCoords(equiv_norm, im_size.cuda()) if self.dist_metric == 'L2': loss = torch.dist(forward_coord_2, equiv_coord, p=2) elif self.dist_metric == 'L1': loss = torch.dist(forward_coord_2, equiv_coord, p=1) return loss
class TransformedGridLoss(nn.Module): def __init__(self, geometric_model='affine', use_cuda=True, grid_size=20): super(TransformedGridLoss, self).__init__() self.geometric_model = geometric_model # define virtual grid of points to be transformed axis_coords = np.linspace(-1,1,grid_size) self.N = grid_size*grid_size X,Y = np.meshgrid(axis_coords,axis_coords) X = np.reshape(X,(1,1,self.N)) Y = np.reshape(Y,(1,1,self.N)) P = np.concatenate((X,Y),1) self.P = Variable(torch.FloatTensor(P),requires_grad=False) self.pointTnf = PointTnf(use_cuda=use_cuda) if use_cuda: self.P = self.P.cuda(); def forward(self, theta, theta_GT): # expand grid according to batch size batch_size = theta.size()[0] P = self.P.expand(batch_size,2,self.N) # compute transformed grid points using estimated and GT tnfs if self.geometric_model=='affine': P_prime = self.pointTnf.affPointTnf(theta,P) P_prime_GT = self.pointTnf.affPointTnf(theta_GT,P) elif self.geometric_model=='tps': P_prime = self.pointTnf.tpsPointTnf(theta.unsqueeze(2).unsqueeze(3),P) P_prime_GT = self.pointTnf.tpsPointTnf(theta_GT,P) # compute MSE loss on transformed grid points loss = torch.sum(torch.pow(P_prime - P_prime_GT,2),1) loss = torch.mean(loss) return loss
class TransformedGridLoss(): def __init__(self, geometric_model='affine', grid_size=20): self.geometric_model = geometric_model axis_coords = np.linspace(-1,1,grid_size) self.N = grid_size*grid_size X, Y = np.meshgrid(axis_coords, axis_coords) X = np.reshape(X, [1,1,self.N]) Y = np.reshape(Y, [1,1,self.N]) P = np.concatenate((X,Y),1) self.P = P self.pointTnf = PointTnf() def __call__(self, theta, theta_GT, batch_size): P = tf.cast(tf.tile(self.P, [batch_size,1,1]),'float32') if self.geometric_model == "affine": P_prime = self.pointTnf.affPointTnf(theta, P) P_prime_GT = self.pointTnf.affPointTnf(theta_GT, P) else: print("Sorry, Cannot use TPS transformation not yet") loss = tf.reduce_sum(tf.pow(P_prime-P_prime_GT,2),1) # Squared distance (MSE loss) loss = tf.reduce_mean(loss) return loss
class TransformedGridLoss(nn.Module): def __init__(self, geometric_model='affine', use_cuda=True, grid_size=20): super(TransformedGridLoss, self).__init__() self.geometric_model = geometric_model # define virtual grid of points to be transformed axis_coords = np.linspace(-1, 1, grid_size) self.N = grid_size * grid_size X, Y = np.meshgrid(axis_coords, axis_coords) X = np.reshape(X, (1, 1, self.N)) Y = np.reshape(Y, (1, 1, self.N)) P = np.concatenate((X, Y), 1) self.P = Variable(torch.FloatTensor(P), requires_grad=False) self.pointTnf = PointTnf(use_cuda=use_cuda) if use_cuda: self.P = self.P.cuda() def forward(self, theta, theta_GT): # expand grid according to batch size batch_size = theta.size()[0] P = self.P.expand(batch_size, 2, self.N) # compute transformed grid points using estimated and GT tnfs if self.geometric_model == 'affine': P_prime = self.pointTnf.affPointTnf(theta, P) P_prime_GT = self.pointTnf.affPointTnf(theta_GT, P) elif self.geometric_model == 'hom': P_prime = self.pointTnf.homPointTnf(theta, P) P_prime_GT = self.pointTnf.homPointTnf(theta_GT, P) elif self.geometric_model == 'tps': P_prime = self.pointTnf.tpsPointTnf( theta.unsqueeze(2).unsqueeze(3), P) P_prime_GT = self.pointTnf.tpsPointTnf(theta_GT, P) # compute MSE loss on transformed grid points loss = torch.sum(torch.pow(P_prime - P_prime_GT, 2), 1) loss = torch.mean(loss) return loss
def pck_metric(batch,batch_start_idx,theta_aff,theta_tps,theta_aff_tps,stats,args,use_cuda=True): alpha = args.pck_alpha do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_aff_tps is not None source_im_size = batch['source_im_size'] target_im_size = batch['target_im_size'] source_points = batch['source_points'] target_points = batch['target_points'] # Instantiate point transformer pt = PointTnf(use_cuda=use_cuda, tps_reg_factor=args.tps_reg_factor) # warp points with estimated transformations target_points_norm = PointsToUnitCoords(target_points,target_im_size) if do_aff: # do affine only warped_points_aff_norm = pt.affPointTnf(theta_aff,target_points_norm) warped_points_aff = PointsToPixelCoords(warped_points_aff_norm,source_im_size) if do_tps: # do tps only warped_points_tps_norm = pt.tpsPointTnf(theta_tps,target_points_norm) warped_points_tps = PointsToPixelCoords(warped_points_tps_norm,source_im_size) if do_aff_tps: # do tps+affine warped_points_aff_tps_norm = pt.tpsPointTnf(theta_aff_tps,target_points_norm) warped_points_aff_tps_norm = pt.affPointTnf(theta_aff,warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords(warped_points_aff_tps_norm,source_im_size) L_pck = batch['L_pck'].data current_batch_size=batch['source_im_size'].size(0) indices = range(batch_start_idx,batch_start_idx+current_batch_size) # import pdb; pdb.set_trace() if do_aff: pck_aff = pck(source_points.data, warped_points_aff.data, L_pck, alpha) if do_tps: pck_tps = pck(source_points.data, warped_points_tps.data, L_pck, alpha) if do_aff_tps: pck_aff_tps = pck(source_points.data, warped_points_aff_tps.data, L_pck, alpha) if do_aff: stats['aff']['pck'][indices] = pck_aff.unsqueeze(1).cpu().numpy() if do_tps: stats['tps']['pck'][indices] = pck_tps.unsqueeze(1).cpu().numpy() if do_aff_tps: stats['aff_tps']['pck'][indices] = pck_aff_tps.unsqueeze(1).cpu().numpy() return stats
def pck_metric(batch,batch_start_idx,theta_aff,theta_tps,theta_aff_tps,model_tps,stats,args,use_cuda=True): alpha = args.pck_alpha do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_aff_tps is not None source_im_size = batch['source_im_size'] target_im_size = batch['target_im_size'] source_points = batch['source_points'] target_points = batch['target_points'] # Instantiate point transformer pt = PointTnf(use_cuda=use_cuda, tps_reg_factor=args.tps_reg_factor) # warp points with estimated transformations target_points_norm = PointsToUnitCoords(target_points,target_im_size) if do_aff: # do affine only warped_points_aff_norm = pt.affPointTnf(theta_aff,target_points_norm) warped_points_aff = PointsToPixelCoords(warped_points_aff_norm,source_im_size) if do_tps: # do tps only warped_points_tps_norm = pt.defPointTnf(theta_tps,target_points_norm,model_tps) warped_points_tps = PointsToPixelCoords(warped_points_tps_norm,source_im_size) if do_aff_tps: # do tps+affine warped_points_aff_tps_norm = pt.defPointTnf(theta_aff_tps,target_points_norm,model_tps) warped_points_aff_tps_norm = pt.affPointTnf(theta_aff,warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords(warped_points_aff_tps_norm,source_im_size) L_pck = batch['L_pck'].data current_batch_size=batch['source_im_size'].size(0) indices = range(batch_start_idx,batch_start_idx+current_batch_size) # import pdb; pdb.set_trace() if do_aff: pck_aff = pck(source_points.data, warped_points_aff.data, L_pck, alpha) if do_tps: pck_tps = pck(source_points.data, warped_points_tps.data, L_pck, alpha) if do_aff_tps: pck_aff_tps = pck(source_points.data, warped_points_aff_tps.data, L_pck, alpha) if do_aff: stats['aff']['pck'][indices] = pck_aff.unsqueeze(1).cpu().numpy() if do_tps: stats['tps']['pck'][indices] = pck_tps.unsqueeze(1).cpu().numpy() if do_aff_tps: stats['aff_tps']['pck'][indices] = pck_aff_tps.unsqueeze(1).cpu().numpy() return stats
def flow_metrics(batch,batch_start_idx,theta_aff,theta_tps,theta_aff_tps,model_tps,stats,args,use_cuda=True): result_path=args.flow_output_dir do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_aff_tps is not None pt=PointTnf(use_cuda=use_cuda) batch_size=batch['source_im_size'].size(0) for b in range(batch_size): h_src = int(batch['source_im_size'][b,0].data.cpu().numpy()) w_src = int(batch['source_im_size'][b,1].data.cpu().numpy()) h_tgt = int(batch['target_im_size'][b,0].data.cpu().numpy()) w_tgt = int(batch['target_im_size'][b,1].data.cpu().numpy()) grid_X,grid_Y = np.meshgrid(np.linspace(-1,1,w_tgt),np.linspace(-1,1,h_tgt)) grid_X = torch.FloatTensor(grid_X).unsqueeze(0).unsqueeze(3) grid_Y = torch.FloatTensor(grid_Y).unsqueeze(0).unsqueeze(3) grid_X = Variable(grid_X,requires_grad=False) grid_Y = Variable(grid_Y,requires_grad=False) if use_cuda: grid_X = grid_X.cuda() grid_Y = grid_Y.cuda() grid_X_vec = grid_X.view(1,1,-1) grid_Y_vec = grid_Y.view(1,1,-1) grid_XY_vec = torch.cat((grid_X_vec,grid_Y_vec),1) def pointsToGrid (x,h_tgt=h_tgt,w_tgt=w_tgt): return x.contiguous().view(1,2,h_tgt,w_tgt).transpose(1,2).transpose(2,3) idx = batch_start_idx+b if do_aff: grid_aff = pointsToGrid(pt.affPointTnf(theta_aff[b,:].unsqueeze(0),grid_XY_vec)) flow_aff = th_sampling_grid_to_np_flow(source_grid=grid_aff,h_src=h_src,w_src=w_src) flow_aff_path = os.path.join(result_path,'aff',batch['flow_path'][b]) create_file_path(flow_aff_path) write_flo_file(flow_aff,flow_aff_path) if do_tps: grid_tps = pointsToGrid(pt.defPointTnf(theta_tps[b,:].unsqueeze(0),grid_XY_vec,model_tps)) flow_tps = th_sampling_grid_to_np_flow(source_grid=grid_tps,h_src=h_src,w_src=w_src) flow_tps_path = os.path.join(result_path,'tps',batch['flow_path'][b]) create_file_path(flow_tps_path) write_flo_file(flow_tps,flow_tps_path) if do_aff_tps: grid_aff_tps = pointsToGrid(pt.affPointTnf(theta_aff[b,:].unsqueeze(0),pt.defPointTnf(theta_aff_tps[b,:].unsqueeze(0),grid_XY_vec,model_tps))) flow_aff_tps = th_sampling_grid_to_np_flow(source_grid=grid_aff_tps,h_src=h_src,w_src=w_src) flow_aff_tps_path = os.path.join(result_path,'aff_tps',batch['flow_path'][b]) create_file_path(flow_aff_tps_path) write_flo_file(flow_aff_tps,flow_aff_tps_path) idx = batch_start_idx+b return stats
class TransformedGridLoss(nn.Module): def __init__(self, geometric_model='affine', use_cuda=True, grid_size=20): super(TransformedGridLoss, self).__init__() self.geometric_model = geometric_model if self.geometric_model == 'vqmt3d': self.get_mat_fun = None elif self.geometric_model == 'affine_simple' or self.geometric_model == 'affine_simple_4': self.get_mat_fun = affine_mat_from_simple elif self.geometric_model == 'rotate': self.get_mat_fun = get_rotate_matrix elif self.geometric_model == 'scale': self.get_mat_fun = get_scale_matrix elif self.geometric_model == 'shift_y': self.get_mat_fun = get_shift_y_matrix else: raise NotImplementedError( 'Specified geometric model is unsupported') # define virtual grid of points to be transformed axis_coords = np.linspace(-1, 1, grid_size) self.N = grid_size * grid_size X, Y = np.meshgrid(axis_coords, axis_coords) X = np.reshape(X, (1, 1, self.N)) Y = np.reshape(Y, (1, 1, self.N)) P = np.concatenate((X, Y), 1) self.P = Variable(torch.FloatTensor(P), requires_grad=False) self.pointTnf = PointTnf(use_cuda=use_cuda) if use_cuda: self.P = self.P.cuda() def forward(self, theta, theta_GT): # expand grid according to batch size batch_size = theta.size(0) P = self.P.expand(batch_size, 2, self.N) # compute transformed grid points using estimated and GT tnfs if self.get_mat_fun: theta_aff = self.get_mat_fun(theta) theta_aff_GT = self.get_mat_fun(theta_GT) else: theta_aff = get_vqmt3d_matrix(theta[:, 0], theta[:, 1]) theta_aff_GT = get_vqmt3d_matrix(theta_GT[:, 0], theta_GT[:, 1]) P_prime = self.pointTnf.affPointTnf(theta_aff, P) P_prime_GT = self.pointTnf.affPointTnf(theta_aff_GT, P) # compute MSE loss on transformed grid points loss = torch.sum(torch.pow(P_prime - P_prime_GT, 2), 1) loss = torch.mean(loss) return loss
class TransLoss(nn.Module): def __init__(self, transform='affine', use_cuda=True): super(TransLoss, self).__init__() self.pointTnf = PointTnf(use_cuda=use_cuda) self.transform = transform self.coord = [] for i in range(config.NUM_OF_COORD): for j in range(config.NUM_OF_COORD): xx = [] xx.append(float(i)) xx.append(float(j)) self.coord.append(xx) self.coord = np.expand_dims(np.array(self.coord).transpose(), axis=0) self.coord = torch.from_numpy(self.coord).float() if use_cuda: self.coord = self.coord.cuda() def forward(self, theta_A, theta_B, theta_C): batch = theta_A.size()[0] b,h,w = self.coord.size() self.coord = Variable(self.coord.expand(batch, h, w)) img_size = Variable(torch.FloatTensor([[240, 240, 1]])).cuda() A_norm = PointsToUnitCoords(self.coord, img_size) A_norm = self.pointTnf.affPointTnf(theta_A, A_norm) A_coord = PointsToPixelCoords(A_norm, img_size) B_norm = PointsToUnitCoords(A_coord, img_size) B_norm = self.pointTnf.affPointTnf(theta_B, B_norm) B_coord = PointsToPixelCoords(B_norm, img_size) C_norm = PointsToUnitCoords(B_coord, img_size) C_norm = self.pointTnf.affPointTnf(theta_C, C_norm) C_coord = PointsToPixelCoords(C_norm, img_size) loss = (torch.dist(self.coord, C_coord, p=2) ** 2) / (config.NUM_OF_COORD * config.NUM_OF_COORD) / batch return loss
class SequentialGridLoss(nn.Module): def __init__(self, use_cuda=True, grid_size=20): super(SequentialGridLoss, self).__init__() self.N = grid_size * grid_size # define virtual grid of points to be transformed axis_coords = np.linspace(-1, 1, grid_size) X, Y = np.meshgrid(axis_coords, axis_coords) X = X.ravel()[None, None, ...] Y = Y.ravel()[None, None, ...] P = np.concatenate((X, Y), axis=1) self.P = torch.tensor(P, dtype=torch.float32, requires_grad=False) self.pointTnf = PointTnf(use_cuda=use_cuda) self.weights = [5000.0, 3000.0, 3000.0] if use_cuda: self.P = self.P.cuda() def warp_and_mse(self, mat, mat_GT, P, P_GT): P_warp = self.pointTnf.affPointTnf(mat, P) P_warp_GT = self.pointTnf.affPointTnf(mat_GT, P_GT) torch.nn.MSELoss() loss = torch.sum(torch.pow(P_warp - P_warp_GT, 2), 1) loss = torch.mean(loss) return loss, P_warp, P_warp_GT def forward(self, theta, theta_GT): # expand grid according to batch size batch_size = theta.size(0) P = self.P.expand(batch_size, 2, self.N) rotate_mat = get_rotate_matrix(theta[:, 0]) rotate_mat_GT = get_rotate_matrix(theta_GT[:, 0]) loss_rotate, P_rotate, P_rotate_GT = self.warp_and_mse( rotate_mat, rotate_mat_GT, P, P) scale_mat = get_scale_matrix(theta[:, 1]) scale_mat_GT = get_scale_matrix(theta_GT[:, 1]) loss_scale, P_scale, P_scale_GT = self.warp_and_mse( scale_mat, scale_mat_GT, P_rotate, P_rotate_GT) shift_mat = get_shift_y_matrix(theta[:, 2]) shift_mat_GT = get_shift_y_matrix(theta_GT[:, 2]) loss_shift, P_shift, P_shift_GT = self.warp_and_mse( shift_mat, shift_mat_GT, P_scale, P_scale_GT) return self.weights[0] * loss_rotate + self.weights[ 1] * loss_scale + self.weights[2] * loss_shift
class GridLossWithMSE(nn.Module): def __init__(self, geometric_model='affine', use_cuda=True, grid_size=20, alpha=0.5): super(GridLossWithMSE, self).__init__() self.alpha = alpha self.geometric_model = geometric_model # define virtual grid of points to be transformed axis_coords = np.linspace(-1, 1, grid_size) self.N = grid_size * grid_size X, Y = np.meshgrid(axis_coords, axis_coords) X = np.reshape(X, (1, 1, self.N)) Y = np.reshape(Y, (1, 1, self.N)) P = np.concatenate((X, Y), 1) self.P = Variable(torch.FloatTensor(P), requires_grad=False) self.pointTnf = PointTnf(use_cuda) if use_cuda: self.P = self.P.cuda() def forward(self, theta, theta_GT, tb_writer=None, step=None): # expand grid according to batch size batch_size = theta.size()[0] P = self.P.expand(batch_size, 2, self.N) # compute transformed grid points using estimated and GT tnfs if self.geometric_model == 'affine': P_prime = self.pointTnf.affPointTnf(theta, P) P_prime_GT = self.pointTnf.affPointTnf(theta_GT, P) elif self.geometric_model == 'tps': P_prime = self.pointTnf.tpsPointTnf( theta.unsqueeze(2).unsqueeze(3), P) P_prime_GT = self.pointTnf.tpsPointTnf(theta_GT, P) # compute MSE loss on transformed grid points grid_loss = torch.sum(torch.pow(P_prime - P_prime_GT, 2), 1) grid_loss = torch.mean(grid_loss) # compute MSE on affinity matrices mse_loss = ((theta.view([-1, 2, 3]) - theta_GT)**2).mean() if tb_writer is not None and step is not None: tb_writer.add_scalar('grid loss', grid_loss.data.item(), step) tb_writer.add_scalar('MSE loss', mse_loss.data.item(), step) return self.alpha * grid_loss + (1 - self.alpha) * mse_loss
class CycleLoss(nn.Module): def __init__(self, image_size=240, transform='affine', use_cuda=True): super(CycleLoss, self).__init__() self.pointTnf = PointTnf(use_cuda=use_cuda) self.transform = transform self.coord = [] for i in range(config.NUM_OF_COORD): for j in range(config.NUM_OF_COORD): xx = [] xx.append(float(i) * image_size / config.NUM_OF_COORD) xx.append(float(j) * image_size / config.NUM_OF_COORD) self.coord.append(xx) self.coord = np.expand_dims(np.array(self.coord).transpose(), axis=0) self.coord = torch.from_numpy(self.coord).float() if use_cuda: self.coord = self.coord.cuda() def forward(self, theta_forward, theta_backward): batch = theta_forward.size()[0] b,h,w = self.coord.size() coord = Variable(self.coord.expand(batch, h, w)) img_size = Variable(torch.FloatTensor([[240, 240, 1]])).cuda() forward_norm = PointsToUnitCoords(coord, img_size) forward_norm = self.pointTnf.affPointTnf(theta_forward, forward_norm) forward_coord = PointsToPixelCoords(forward_norm, img_size) backward_norm = PointsToUnitCoords(forward_coord, img_size) backward_norm = self.pointTnf.affPointTnf(theta_backward, backward_norm) backward_coord = PointsToPixelCoords(backward_norm, img_size) loss = (torch.dist(coord, backward_coord, p=2) ** 2) / (config.NUM_OF_COORD * config.NUM_OF_COORD) / batch return loss
def pck_metric(batch, batch_start_idx, theta_aff, theta_aff_tps, stats, args, use_cuda=True): alpha = args.pck_alpha source_im_size = batch['source_im_size'] target_im_size = batch['target_im_size'] source_points = batch['source_points'] target_points = batch['target_points'] # Instantiate point transformer pt = PointTnf(use_cuda=use_cuda, tps_reg_factor=args.tps_reg_factor) # warp points with estimated transformations target_points_norm = PointsToUnitCoords(target_points, target_im_size) warped_points_aff_tps_norm = pt.tpsPointTnf(theta_aff_tps, target_points_norm) warped_points_aff_tps_norm = pt.affPointTnf(theta_aff, warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords(warped_points_aff_tps_norm, source_im_size) L_pck = batch['L_pck'].data current_batch_size = batch['source_im_size'].size(0) indices = range(batch_start_idx, batch_start_idx + current_batch_size) pck_aff_tps = pck(source_points.data, warped_points_aff_tps.data, L_pck, alpha) stats['aff_tps']['pck'][indices] = pck_aff_tps.unsqueeze(1).cpu().numpy() return stats
def area_metrics(batch, batch_start_idx, theta_aff, theta_tps, theta_aff_tps, stats, args, use_cuda=True): do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_aff_tps is not None batch_size = batch['source_im_size'].size(0) pt = PointTnf(use_cuda=use_cuda) for b in range(batch_size): h_src = int(batch['source_im_size'][b, 0].data.cpu().numpy()) w_src = int(batch['source_im_size'][b, 1].data.cpu().numpy()) h_tgt = int(batch['target_im_size'][b, 0].data.cpu().numpy()) w_tgt = int(batch['target_im_size'][b, 1].data.cpu().numpy()) target_mask_np, target_mask = poly_str_to_mask( batch['target_polygon'][0][b], batch['target_polygon'][1][b], h_tgt, w_tgt, use_cuda=use_cuda) source_mask_np, source_mask = poly_str_to_mask( batch['source_polygon'][0][b], batch['source_polygon'][1][b], h_src, w_src, use_cuda=use_cuda) grid_X, grid_Y = np.meshgrid(np.linspace(-1, 1, w_tgt), np.linspace(-1, 1, h_tgt)) grid_X = torch.FloatTensor(grid_X).unsqueeze(0).unsqueeze(3) grid_Y = torch.FloatTensor(grid_Y).unsqueeze(0).unsqueeze(3) grid_X = Variable(grid_X, requires_grad=False) grid_Y = Variable(grid_Y, requires_grad=False) if use_cuda: grid_X = grid_X.cuda() grid_Y = grid_Y.cuda() grid_X_vec = grid_X.view(1, 1, -1) grid_Y_vec = grid_Y.view(1, 1, -1) grid_XY_vec = torch.cat((grid_X_vec, grid_Y_vec), 1) def pointsToGrid(x, h_tgt=h_tgt, w_tgt=w_tgt): return x.contiguous().view(1, 2, h_tgt, w_tgt).transpose(1, 2).transpose(2, 3) idx = batch_start_idx + b if do_aff: grid_aff = pointsToGrid( pt.affPointTnf(theta_aff[b, :].unsqueeze(0), grid_XY_vec)) warped_mask_aff = F.grid_sample(source_mask, grid_aff, align_corners=True) flow_aff = th_sampling_grid_to_np_flow(source_grid=grid_aff, h_src=h_src, w_src=w_src) stats['aff']['intersection_over_union'][ idx] = intersection_over_union(warped_mask_aff, target_mask) stats['aff']['label_transfer_accuracy'][ idx] = label_transfer_accuracy(warped_mask_aff, target_mask) stats['aff']['localization_error'][idx] = localization_error( source_mask_np, target_mask_np, flow_aff) if do_tps: grid_tps = pointsToGrid( pt.tpsPointTnf(theta_tps[b, :].unsqueeze(0), grid_XY_vec)) warped_mask_tps = F.grid_sample(source_mask, grid_tps, align_corners=True) flow_tps = th_sampling_grid_to_np_flow(source_grid=grid_tps, h_src=h_src, w_src=w_src) stats['tps']['intersection_over_union'][ idx] = intersection_over_union(warped_mask_tps, target_mask) stats['tps']['label_transfer_accuracy'][ idx] = label_transfer_accuracy(warped_mask_tps, target_mask) stats['tps']['localization_error'][idx] = localization_error( source_mask_np, target_mask_np, flow_tps) if do_aff_tps: grid_aff_tps = pointsToGrid( pt.affPointTnf( theta_aff[b, :].unsqueeze(0), pt.tpsPointTnf(theta_aff_tps[b, :].unsqueeze(0), grid_XY_vec))) warped_mask_aff_tps = F.grid_sample(source_mask, grid_aff_tps, align_corners=True) flow_aff_tps = th_sampling_grid_to_np_flow( source_grid=grid_aff_tps, h_src=h_src, w_src=w_src) stats['aff_tps']['intersection_over_union'][ idx] = intersection_over_union(warped_mask_aff_tps, target_mask) stats['aff_tps']['label_transfer_accuracy'][ idx] = label_transfer_accuracy(warped_mask_aff_tps, target_mask) stats['aff_tps']['localization_error'][idx] = localization_error( source_mask_np, target_mask_np, flow_aff_tps) return stats
class TransformedGridLoss(nn.Module): def __init__(self, geometric_model='affine', use_cuda=True, grid_size=20): super(TransformedGridLoss, self).__init__() self.geometric_model = geometric_model # define virtual grid of points to be transformed (grid_size x grid_size) axis_coords = np.linspace(-1, 1, grid_size) self.N = grid_size * grid_size X, Y = np.meshgrid(axis_coords, axis_coords) X = np.reshape(X, (1, 1, self.N)) Y = np.reshape(Y, (1, 1, self.N)) P = np.concatenate((X, Y), 1) self.P = Variable(torch.FloatTensor(P), requires_grad=False) self.pointTnf = PointTnf(use_cuda) if use_cuda: self.P = self.P.cuda() def forward(self, theta_AB, theta_BA, theta_AC, theta_CA, theta_GT_AB): # expand grid according to batch size batch_size = theta_AB.size()[0] P = self.P.expand(batch_size, 2, self.N) if self.geometric_model == 'affine': theta_GT_mat_AB = theta_GT_AB.view(-1, 2, 3) # inverse GT batch theta_GT_mat_temp = torch.cat( (theta_GT_mat_AB, (torch.cuda.FloatTensor( [0, 0, 1]).unsqueeze(0).unsqueeze(1).expand( batch_size, 1, 3))), 1) for i in range(batch_size): theta_GT_mat_temp[i] = theta_GT_mat_temp[i].inverse() theta_GT_BA = theta_GT_mat_temp.view(-1, 9)[:, :6] # compute transformed grid points using estimated and GT tnfs P_prime_GT = self.pointTnf.affPointTnf(theta_GT_AB, P) P_prime_GT_inv = self.pointTnf.affPointTnf(theta_GT_BA, P) P_prime_original = self.pointTnf.affPointTnf(theta_AB, P) P_prime_original_inv = self.pointTnf.affPointTnf(theta_BA, P) P_prime_jittered = self.pointTnf.affPointTnf(theta_AC, P) P_prime_jittered_inv = self.pointTnf.affPointTnf(theta_CA, P) # compute MSE loss on transformed grid points alpha = 0.5 beta = 0.3 gamma = 0.2 l_original = torch.sum(torch.pow( P_prime_original - P_prime_GT, 2), 1) + torch.sum( torch.pow(P_prime_original_inv - P_prime_GT_inv, 2), 1) l_jittered = torch.sum(torch.pow( P_prime_jittered - P_prime_GT, 2), 1) + torch.sum( torch.pow(P_prime_jittered_inv - P_prime_GT_inv, 2), 1) l_identity = torch.sum( torch.pow(P_prime_original - P_prime_jittered, 2), 1) + torch.sum( torch.pow(P_prime_original_inv - P_prime_jittered_inv, 2), 1) Loss = (alpha * l_original) + (beta * l_jittered) + (gamma * l_identity) Loss = torch.mean(Loss) return Loss
class PFWillowDataset(Dataset): """ Description: Proposal Flow image pair dataset Args: csv_file (string): Path to the csv file with image names and transformations. dataset_path (string): Directory with the images. output_size (2-tuple): Desired output size transform (callable): Transformation for post-processing the training pair (eg. image normalization) """ def __init__(self, csv_file, dataset_path, output_size=(240,240), transform=None, category=None): self.category_names = ['car(G)', 'car(M)', 'car(S)', 'duck(S)', 'motorbike(G)', 'motorbike(M)', 'motorbike(S)', 'winebottle(M)', 'winebottle(wC)', 'winebottle(woC)'] self.out_h, self.out_w = output_size self.pairs = pd.read_csv(csv_file) self.category = self.pairs.iloc[:,2].as_matrix().astype('float') if category is not None: cat_idx = np.nonzero(self.category==category)[0] self.category=self.category[cat_idx] self.pairs=self.pairs.iloc[cat_idx,:] self.img_A_names = self.pairs.iloc[:,0] self.img_B_names = self.pairs.iloc[:,1] self.point_A_coords = self.pairs.iloc[:, 3:5] self.point_B_coords = self.pairs.iloc[:, 5:7] self.flip = self.pairs.iloc[:,7].as_matrix().astype('int') self.dataset_path = dataset_path self.transform = transform # no cuda as dataset is called from CPU threads in dataloader and produces confilct self.affineTnf = GeometricTnf(out_h=self.out_h, out_w=self.out_w, use_cuda=False) """ Newly added """ self.theta_identity = torch.Tensor(np.expand_dims(np.array([[1,0,0],[0,1,0]]),0).astype(np.float32)) self.pointTnf = PointTnf(use_cuda=False) def __len__(self): return len(self.pairs) def __getitem__(self, idx): # get pre-processed images flipA = False flipB = False if self.flip[idx] == 1: flipB = True elif self.flip[idx] == 2: flipA = True elif self.flip[idx] == 3: flipA = True flipB = True image_A, im_size_A = self.get_image(self.img_A_names, idx, flip=flipA) image_B, im_size_B = self.get_image(self.img_B_names, idx, flip=flipB) # category: class of pf-pascal, will be the index of class list plus 1 image_category = self.category[idx] # get pre-processed point coords point_A_coords, warped_point_A_coords = self.get_points(self.point_A_coords, idx, flipA, im_size_A, (240,240,3)) #print("point_A_coords size:", point_A_coords.size) #print("warped point_A_coords size:", warped_point_A_coords.size) point_B_coords, warped_point_B_coords = self.get_points(self.point_B_coords, idx, flipB, im_size_B, (240,240,3)) correspondence = self.pack_corr(point_A_coords, warped_point_A_coords, warped_point_B_coords) # compute PCK reference length L_pck (equal to max bounding box side in image_A) L_pck = torch.FloatTensor([torch.max(point_A_coords.max(1)[0] - point_A_coords.min(1)[0])]) sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 'target_im_size': im_size_B, 'source_points': point_A_coords, 'target_points': point_B_coords, 'warped_source_points': warped_point_A_coords, 'warped_target_points': warped_point_B_coords, 'correspondence': correspondence, 'category': image_category, 'L_pck': L_pck} if self.transform: sample = self.transform(sample) return sample def get_image(self, img_name_list, idx, flip): img_name = os.path.join(self.dataset_path, img_name_list.iloc[idx]) image = io.imread(img_name) # if gray scale, convert to 3-channel image if image.ndim == 2: image = np.repeat(np.expand_dim(image, 2), axis=2, repeats=3) if flip: image = np.flip(image, 1) # get image size im_size = np.asarray(image.shape) # convert to torch Variable image = np.expand_dims(image.transpose((2,0,1)),0) image = torch.Tensor(image.astype(np.float32)) image_var = Variable(image,requires_grad=False) # Resize image using bilinear sampling with identity affine tnf image = self.affineTnf(image_var).data.squeeze(0) im_size = torch.Tensor(im_size.astype(np.float32)) return (image, im_size) def get_points(self, point_coords_list, idx, flip, im_size, warped_im_size): X = np.fromstring(point_coords_list.iloc[idx,0], sep=';') Y = np.fromstring(point_coords_list.iloc[idx,1], sep=';') if flip: X = im_size[1] - X Xpad = -np.ones(20); Xpad[:len(X)] = X Ypad = -np.ones(20); Ypad[:len(X)] = Y point_coords = np.concatenate((Xpad.reshape(1, 20), Ypad.reshape(1, 20)), axis=0) h,w,c = im_size im_size = torch.FloatTensor([[h,w,c]]) coordinate = torch.FloatTensor(point_coords).view(1, 2, 20) #target_points_norm = PointsToUnitCoords(point_coords, im_size) target_points_norm = PointsToUnitCoords(coordinate, im_size) h,w,c = warped_im_size warped_im_size = torch.FloatTensor([[h,w,c]]) warped_points_aff_norm = self.pointTnf.affPointTnf(self.theta_identity, target_points_norm) warped_points_aff = PointsToPixelCoords(warped_points_aff_norm, warped_im_size) # make arrays float tensor for subsequent processing point_coords = torch.Tensor(point_coords.astype(np.float32)) return point_coords, warped_points_aff def pack_corr(self, a, warp_a, warp_b): corr = np.zeros((20, 5)) for i in range(len(a.numpy()[0])): if a[0][i] >= 0: corr[i][0] = warp_a.numpy()[0][0][i] corr[i][1] = warp_a.numpy()[0][1][i] corr[i][2] = warp_b.numpy()[0][0][i] corr[i][3] = warp_b.numpy()[0][1][i] corr[i][4] = 1 corr = torch.FloatTensor(corr).view(20,5) return corr
class PFPascalDataset(Dataset): """ Description: Proposal Flow image pair dataset Args: csv_file (string): Path to the csv file with image names and transformations. dataset_path (string): Directory with the images. output_size (2-tuple): Desired output size transform (callable): Transformation for post-processing the training pair (eg. image normalization) """ def __init__(self, csv_file, dataset_path, dataset_size=None, output_size=(240, 240), transform=None, category=None, random_crop=True): self.category_names = [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] self.random_crop = random_crop self.out_h, self.out_w = output_size self.pairs = pd.read_csv(csv_file) if dataset_size is not None: dataset_size = min(dataset_size, len(self.pairs)) self.pairs = self.pairs.iloc[0:dataset_size, :] self.category = self.pairs.iloc[:, 2].as_matrix().astype('float') if category is not None: cat_idx = np.nonzero(self.category == category)[0] self.category = self.category[cat_idx] self.pairs = self.pairs.iloc[cat_idx, :] self.img_A_names = self.pairs.iloc[:, 0] self.img_B_names = self.pairs.iloc[:, 1] self.point_A_coords = self.pairs.iloc[:, 3:5] self.point_B_coords = self.pairs.iloc[:, 5:7] self.flip = self.pairs.iloc[:, 7].as_matrix().astype('int') self.dataset_path = dataset_path self.transform = transform # no cuda as dataset is called from CPU threads in dataloader and produces confilct self.affineTnf = GeometricTnf(out_h=self.out_h, out_w=self.out_w, use_cuda=False) """ Newly added """ self.theta_identity = torch.Tensor( np.expand_dims(np.array([[1, 0, 0], [0, 1, 0]]), 0).astype(np.float32)) self.pointTnf = PointTnf(use_cuda=False) def __len__(self): return len(self.pairs) def __getitem__(self, idx): flipA = False flipB = False if self.flip[idx] == 1: flipB = True elif self.flip[idx] == 2: flipA = True elif self.flip[idx] == 3: flipA = True flipB = True image_A, im_size_A, boundary_A = self.get_image(self.img_A_names, idx, flip=flipA) image_B, im_size_B, boundary_B = self.get_image(self.img_B_names, idx, flip=flipB) # get pre-processed point coords point_A_coords, warped_point_A_coords = self.get_points( self.point_A_coords, idx, flipA, im_size_A, (240, 240, 3), boundary_A) point_B_coords, warped_point_B_coords = self.get_points( self.point_B_coords, idx, flipB, im_size_B, (240, 240, 3), boundary_B) correspondence = self.pack_corr(point_A_coords, point_B_coords, warped_point_A_coords, warped_point_B_coords) # if torch.sum(torch.sum(correspondence,1),0).numpy()[0] == 0: # token = True # else: # token = False # category: class of pf-pascal, will be the index of class list plus 1 image_category = self.category[idx] # compute PCK reference length L_pck (equal to max bounding box side in image_A) L_pck = torch.FloatTensor( [torch.max(point_A_coords.max(1)[0] - point_A_coords.min(1)[0])]) sample = { 'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 'target_im_size': im_size_B, 'source_points': point_A_coords, 'target_points': point_B_coords, 'warped_source_points': warped_point_A_coords, 'warped_target_points': warped_point_B_coords, 'correspondence': correspondence, 'category': image_category, 'L_pck': L_pck } if self.transform: sample = self.transform(sample) return sample def get_image(self, img_name_list, idx, flip): img_name = os.path.join(self.dataset_path, img_name_list.iloc[idx]) image = io.imread(img_name) # if gray scale, convert to 3-channel image if image.ndim == 2: image = np.repeat(np.expand_dim(image, 2), axis=2, repeats=3) if self.random_crop: h, w, c = image.shape top = np.random.randint(h / 4) bottom = int(3 * h / 4 + np.random.randint(h / 4)) left = np.random.randint(w / 4) right = int(3 * w / 4 + np.random.randint(w / 4)) boundary = (top, bottom, left, right) image = image[top:bottom, left:right, :] if flip: image = np.flip(image, 1) # get image size im_size = np.asarray(image.shape) # convert to torch Variable image = np.expand_dims(image.transpose((2, 0, 1)), 0) image = torch.Tensor(image.astype(np.float32)) image_var = Variable(image, requires_grad=False) # Resize image using bilinear sampling with identity affine tnf image = self.affineTnf(image_var).data.squeeze(0) im_size = torch.Tensor(im_size.astype(np.float32)) return (image, im_size, boundary) def get_points(self, point_coords_list, idx, flip, im_size, warped_im_size, boundary): X = np.fromstring(point_coords_list.iloc[idx, 0], sep=';') Y = np.fromstring(point_coords_list.iloc[idx, 1], sep=';') top, bottom, left, right = boundary if self.random_crop: X = X - left Y = Y - top ind = [] for i in range(len(X)): if X[i] < 0 or X[i] >= (right - left) or Y[i] < 0 or Y[i] >= ( bottom - top): ind.append(i) if flip: X = im_size[1] - X Xpad = -np.ones(20) Xpad[:len(X)] = X Ypad = -np.ones(20) Ypad[:len(X)] = Y if len(ind) != 0: for i in ind: Xpad[i] = -1 Ypad[i] = -1 point_coords = np.concatenate( (Xpad.reshape(1, 20), Ypad.reshape(1, 20)), axis=0) h, w, c = im_size im_size = torch.FloatTensor([[h, w, c]]) coordinate = torch.FloatTensor(point_coords).view(1, 2, 20) target_points_norm = PointsToUnitCoords(coordinate, im_size) h, w, c = warped_im_size warped_im_size = torch.FloatTensor([[h, w, c]]) warped_points_aff_norm = self.pointTnf.affPointTnf( self.theta_identity, target_points_norm) warped_points_aff = PointsToPixelCoords(warped_points_aff_norm, warped_im_size) # make arrays float tensor for subsequent processing point_coords = torch.Tensor(point_coords.astype(np.float32)) return point_coords, warped_points_aff def pack_corr(self, a, b, warp_a, warp_b): corr = np.zeros((20, 5)) for i in range(len(a.numpy()[0])): if a[0][i] >= 0 and a[0][i] < self.out_w \ and a[1][i] >= 0 and a[1][i] < self.out_h \ and b[0][i] >= 0 and b[0][i] < self.out_w \ and b[1][i] >= 0 and b[1][i] < self.out_h: corr[i][0] = warp_a.numpy()[0][0][i] corr[i][1] = warp_a.numpy()[0][1][i] corr[i][2] = warp_b.numpy()[0][0][i] corr[i][3] = warp_b.numpy()[0][1][i] corr[i][4] = 1 corr = torch.FloatTensor(corr).view(20, 5) return corr