def __call__(self, image_batch, theta_aff, theta_aff_tps, use_cuda=True): sampling_grid_aff = self.affTnf(image_batch=None, theta_batch=theta_aff.view(-1,2,3), return_sampling_grid=True, return_warped_image=False) sampling_grid_aff_tps = self.tpsTnf(image_batch=None, theta_batch=theta_aff_tps, return_sampling_grid=True, return_warped_image=False) if self.padding_crop_factor is not None: sampling_grid_aff_tps = sampling_grid_aff_tps*self.padding_crop_factor; # put 1e10 value in region out of bounds of sampling_grid_aff in_bound_mask_aff = ((sampling_grid_aff[:,:,:,0]>-1) * (sampling_grid_aff[:,:,:,0]<1) * (sampling_grid_aff[:,:,:,1]>-1) * (sampling_grid_aff[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff = in_bound_mask_aff.expand_as(sampling_grid_aff) sampling_grid_aff = torch.mul(in_bound_mask_aff.float(),sampling_grid_aff) sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),sampling_grid_aff) # compose transformations sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3) # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp in_bound_mask_aff_tps=((sampling_grid_aff_tps[:,:,:,0]>-1) * (sampling_grid_aff_tps[:,:,:,0]<1) * (sampling_grid_aff_tps[:,:,:,1]>-1) * (sampling_grid_aff_tps[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp) sampling_grid_aff_tps_comp=torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp) sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),sampling_grid_aff_tps_comp) # sample transformed image warped_image_batch = F.grid_sample(image_batch, sampling_grid_aff_tps_comp) return warped_image_batch
def _crop_pool_layer(bottom, rois, max_pool=True): # code modified from # https://github.com/ruotianluo/pytorch-faster-rcnn # implement it using stn # box to affine # input (x1,y1,x2,y2) """ [ x2-x1 x1 + x2 - W + 1 ] [ ----- 0 --------------- ] [ W - 1 W - 1 ] [ ] [ y2-y1 y1 + y2 - H + 1 ] [ 0 ----- --------------- ] [ H - 1 H - 1 ] """ rois = rois.detach() batch_size = bottom.size(0) D = bottom.size(1) H = bottom.size(2) W = bottom.size(3) roi_per_batch = rois.size(0) / batch_size x1 = rois[:, 1::4] / 16.0 y1 = rois[:, 2::4] / 16.0 x2 = rois[:, 3::4] / 16.0 y2 = rois[:, 4::4] / 16.0 height = bottom.size(2) width = bottom.size(3) # affine theta zero = Variable(rois.data.new(rois.size(0), 1).zero_()) theta = torch.cat([\ (x2 - x1) / (width - 1), zero, (x1 + x2 - width + 1) / (width - 1), zero, (y2 - y1) / (height - 1), (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) if max_pool: pre_pool_size = cfg.POOLING_SIZE * 2 grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ .contiguous().view(-1, D, H, W) crops = F.grid_sample(bottom, grid) crops = F.max_pool2d(crops, 2, 2) else: grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE))) bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ .contiguous().view(-1, D, H, W) crops = F.grid_sample(bottom, grid) return crops, grid
def area_metrics(batch,batch_start_idx,theta_aff,theta_tps,theta_aff_tps,stats,args,use_cuda=True): do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_aff_tps is not None batch_size=batch['source_im_size'].size(0) for b in range(batch_size): h_src = int(batch['source_im_size'][b,0].data.cpu().numpy()) w_src = int(batch['source_im_size'][b,1].data.cpu().numpy()) h_tgt = int(batch['target_im_size'][b,0].data.cpu().numpy()) w_tgt = int(batch['target_im_size'][b,1].data.cpu().numpy()) target_mask_np,target_mask = poly_str_to_mask(batch['target_polygon'][0][b], batch['target_polygon'][1][b], h_tgt,w_tgt,use_cuda=use_cuda) source_mask_np,source_mask = poly_str_to_mask(batch['source_polygon'][0][b], batch['source_polygon'][1][b], h_src,w_src,use_cuda=use_cuda) grid_aff,grid_tps,grid_aff_tps=theta_to_sampling_grid(h_tgt,w_tgt, theta_aff[b,:] if do_aff else None, theta_tps[b,:] if do_tps else None, theta_aff_tps[b,:] if do_aff_tps else None, use_cuda=use_cuda, tps_reg_factor=args.tps_reg_factor) idx = batch_start_idx+b if do_aff: warped_mask_aff = F.grid_sample(source_mask, grid_aff) flow_aff = th_sampling_grid_to_np_flow(source_grid=grid_aff,h_src=h_src,w_src=w_src) stats['aff']['intersection_over_union'][idx] = intersection_over_union(warped_mask_aff,target_mask) stats['aff']['label_transfer_accuracy'][idx] = label_transfer_accuracy(warped_mask_aff,target_mask) stats['aff']['localization_error'][idx] = localization_error(source_mask_np, target_mask_np, flow_aff) if do_tps: warped_mask_tps = F.grid_sample(source_mask, grid_tps) flow_tps = th_sampling_grid_to_np_flow(source_grid=grid_tps,h_src=h_src,w_src=w_src) stats['tps']['intersection_over_union'][idx] = intersection_over_union(warped_mask_tps,target_mask) stats['tps']['label_transfer_accuracy'][idx] = label_transfer_accuracy(warped_mask_tps,target_mask) stats['tps']['localization_error'][idx] = localization_error(source_mask_np, target_mask_np, flow_tps) if do_aff_tps: warped_mask_aff_tps = F.grid_sample(source_mask, grid_aff_tps) flow_aff_tps = th_sampling_grid_to_np_flow(source_grid=grid_aff_tps,h_src=h_src,w_src=w_src) stats['aff_tps']['intersection_over_union'][idx] = intersection_over_union(warped_mask_aff_tps,target_mask) stats['aff_tps']['label_transfer_accuracy'][idx] = label_transfer_accuracy(warped_mask_aff_tps,target_mask) stats['aff_tps']['localization_error'][idx] = localization_error(source_mask_np, target_mask_np, flow_aff_tps) return stats
def window_to_image(z_where, window_size, image_size, windows): n = windows.size(0) assert windows.size(1) == window_size ** 2, 'Size mismatch.' theta = expand_z_where(z_where) grid = F.affine_grid(theta, torch.Size((n, 1, image_size, image_size))) out = F.grid_sample(windows.view(n, 1, window_size, window_size), grid) return out.view(n, image_size, image_size)
def image_to_window(z_where, window_size, image_size, images): n = images.size(0) assert images.size(1) == images.size(2) == image_size, 'Size mismatch.' theta_inv = expand_z_where(z_where_inv(z_where)) grid = F.affine_grid(theta_inv, torch.Size((n, 1, window_size, window_size))) out = F.grid_sample(images.view(n, 1, image_size, image_size), grid) return out.view(n, -1)
def stn(self, z, c): zs = z.view(-1, 10 * 3 * 3) theta = self.fc_loc(zs) theta = theta.view(-1, 2, 3) grid = F.affine_grid(theta, c.size()) cond = F.grid_sample(c, grid) return cond
def compare_grid_sample(): # do gradcheck N = random.randint(1, 8) C = 2 # random.randint(1, 8) H = 5 # random.randint(1, 8) W = 4 # random.randint(1, 8) input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True) input_p = input.clone().data.contiguous() grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True) grid_clone = grid.clone().contiguous() out_offcial = F.grid_sample(input, grid) grad_outputs = Variable(torch.rand(out_offcial.size()).cuda()) grad_outputs_clone = grad_outputs.clone().contiguous() grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous()) grad_input_off = grad_inputs[0] crf = RoICropFunction() grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda() out_stn = crf.forward(input_p, grid_yx) grad_inputs = crf.backward(grad_outputs_clone.data) grad_input_stn = grad_inputs[0] pdb.set_trace() delta = (grad_input_off.data - grad_input_stn).sum()
def pad_image(image, pixel_2d, sout = None, grid_out = None): # image is the input image # pixel 2d are the coordinates, from 0 to sout # first coordinate is x, second coordinate is y # sout is the side length of the output image # grid_out is the meshgrid of dimension sout x sout # image should be N x 1 x slen x slen assert len(image.shape) == 4 assert image.shape[1] == 1 # assert there is a coordinate for each image assert image.shape[0] == pixel_2d.shape[0] batchsize = image.shape[0] sin = image.shape[-1] if grid_out is None: assert sout is not None r0 = (sout - 1) / 2 grid_out = torch.FloatTensor(np.mgrid[0:sout, 0:sout].transpose() - r0) else: sout = grid_out.shape[0] grid1 = grid_out.unsqueeze(0).expand([pixel_2d.size(0), -1, -1, -1]) grid2 = grid1 - pixel_2d.float().unsqueeze(1).unsqueeze(1) grid3 = (grid2.float() + (sout // 2)) / (sin // 2) # grid sample only works with 4D inputs padded = f.grid_sample(image, grid3) return padded
def resize(original, um_sizes, desired_res): """ Resize array originally of um_sizes size to have desired_res resolution. We preserve the center of original and resized arrays exactly in the middle. We also make sure resolution is exactly the desired resolution. Given these two constraints, we cannot hold FOV of original and resized arrays to be exactly the same. :param np.array original: Array to resize. :param tuple um_sizes: Size in microns of the array (one per axis). :param int or tuple desired_res: Desired resolution (um/px) for the output array. :return: Output array (np.float32) resampled to the desired resolution. Size in pixels is round(um_sizes / desired_res). """ import torch.nn.functional as F # Create grid to sample in microns grid = create_grid(um_sizes, desired_res) # d x h x w x 3 # Re-express as a torch grid [-1, 1] um_per_px = np.array([um / px for um, px in zip(um_sizes, original.shape)]) torch_ones = np.array(um_sizes) / 2 - um_per_px / 2 # sample position of last pixel in original grid = grid / torch_ones[::-1].astype(np.float32) # Resample input_tensor = torch.from_numpy(original.reshape(1, 1, *original.shape).astype( np.float32)) grid_tensor = torch.from_numpy(grid.reshape(1, *grid.shape)) resized_tensor = F.grid_sample(input_tensor, grid_tensor, padding_mode='border') resized = resized_tensor.numpy().squeeze() return resized
def stn(self, x): xs = self.localization(x) xs = xs.view(-1, 10 * 3 * 3) theta = self.fc_loc(xs) theta = theta.view(-1, 2, 3) grid = F.affine_grid(theta, x.size()) x = F.grid_sample(x, grid) return x
def warp_image(image, flow): """ Warp image (np.ndarray, shape=[h_src,w_src,3]) with flow (np.ndarray, shape=[h_tgt,w_tgt,2]) """ h_src,w_src=image.shape[0],image.shape[1] sampling_grid_torch = np_flow_to_th_sampling_grid(flow, h_src, w_src) image_torch = Variable(torch.FloatTensor(image.astype(np.float32)).transpose(1,2).transpose(0,1).unsqueeze(0)) warped_image_torch = F.grid_sample(image_torch, sampling_grid_torch) warped_image = warped_image_torch.data.squeeze(0).transpose(0,1).transpose(1,2).numpy().astype(np.uint8) return warped_image
def affTpsTnf(source_image, theta_aff, theta_aff_tps, use_cuda=use_cuda): tpstnf = GeometricTnf(geometric_model = 'tps',use_cuda=use_cuda) sampling_grid = tpstnf(image_batch=source_image, theta_batch=theta_aff_tps, return_sampling_grid=True)[1] X = sampling_grid[:,:,:,0].unsqueeze(3) Y = sampling_grid[:,:,:,1].unsqueeze(3) Xp = X*theta_aff[:,0].unsqueeze(1).unsqueeze(2)+Y*theta_aff[:,1].unsqueeze(1).unsqueeze(2)+theta_aff[:,2].unsqueeze(1).unsqueeze(2) Yp = X*theta_aff[:,3].unsqueeze(1).unsqueeze(2)+Y*theta_aff[:,4].unsqueeze(1).unsqueeze(2)+theta_aff[:,5].unsqueeze(1).unsqueeze(2) sg = torch.cat((Xp,Yp),3) warped_image_batch = F.grid_sample(source_image, sg) return warped_image_batch
def warp_grid_torch(torch_mask, torch_grid, torch_texture): torch_grid = torch_grid.transpose(0, 1) torch_texture = torch_texture.transpose(2, 0) torch_texture = torch_texture.transpose(1, 2) torch_texture = torch_texture.unsqueeze(0) torch_grid = normalize_grid_for_grid_sample(torch_grid) torch_grid = torch_grid.unsqueeze(0) res = F.grid_sample(torch_texture, torch_grid, mode="bilinear").squeeze() res = res * torch_mask return res
def warp_grid_torch(torch_mask, torch_grid, torch_texture): """ :param torch_mask: :param torch_grid: :param torch_texture: CxHxW tensor :return: """ assert len(torch_texture.shape) == 3 torch_texture = torch_texture.unsqueeze(0) torch_grid = normalize_grid_for_grid_sample(torch_grid) torch_grid = torch_grid.unsqueeze(0) res = F.grid_sample(torch_texture, torch_grid, mode="bilinear").squeeze(0) res = res * torch_mask return res
def find_tensor_peak_batch(heatmap, radius, downsample, threshold = 0.000001): assert heatmap.dim() == 3, 'The dimension of the heatmap is wrong : {}'.format(heatmap.size()) assert radius > 0 and isinstance(radius, numbers.Number), 'The radius is not ok : {}'.format(radius) num_pts, H, W = heatmap.size(0), heatmap.size(1), heatmap.size(2) assert W > 1 and H > 1, 'To avoid the normalization function divide zero' # find the approximate location: score, index = torch.max(heatmap.view(num_pts, -1), 1) index_w = (index % W).float() index_h = (index / W).float() def normalize(x, L): return -1. + 2. * x.data / (L-1) boxes = [index_w - radius, index_h - radius, index_w + radius, index_h + radius] boxes[0] = normalize(boxes[0], W) boxes[1] = normalize(boxes[1], H) boxes[2] = normalize(boxes[2], W) boxes[3] = normalize(boxes[3], H) #affine_parameter = [(boxes[2]-boxes[0])/2, boxes[0]*0, (boxes[2]+boxes[0])/2, # boxes[0]*0, (boxes[3]-boxes[1])/2, (boxes[3]+boxes[1])/2] #theta = torch.stack(affine_parameter, 1).view(num_pts, 2, 3) affine_parameter = torch.zeros((num_pts, 2, 3)) affine_parameter[:,0,0] = (boxes[2]-boxes[0])/2 affine_parameter[:,0,2] = (boxes[2]+boxes[0])/2 affine_parameter[:,1,1] = (boxes[3]-boxes[1])/2 affine_parameter[:,1,2] = (boxes[3]+boxes[1])/2 # extract the sub-region heatmap theta = affine_parameter.to(heatmap.device) grid_size = torch.Size([num_pts, 1, radius*2+1, radius*2+1]) grid = F.affine_grid(theta, grid_size) sub_feature = F.grid_sample(heatmap.unsqueeze(1), grid).squeeze(1) sub_feature = F.threshold(sub_feature, threshold, np.finfo(float).eps) X = torch.arange(-radius, radius+1).to(heatmap).view(1, 1, radius*2+1) Y = torch.arange(-radius, radius+1).to(heatmap).view(1, radius*2+1, 1) sum_region = torch.sum(sub_feature.view(num_pts,-1),1) x = torch.sum((sub_feature*X).view(num_pts,-1),1) / sum_region + index_w y = torch.sum((sub_feature*Y).view(num_pts,-1),1) / sum_region + index_h x = x * downsample + downsample / 2.0 - 0.5 y = y * downsample + downsample / 2.0 - 0.5 return torch.stack([x, y],1), score
def crop_image(image, pixel_2d, sin = None, grid0 = None): # image should be N x 1 x slen x slen assert len(image.shape) == 4 assert image.shape[1] == 1 # assert there is a coordinate for each image assert image.shape[0] == pixel_2d.shape[0] batchsize, _, h, _ = image.shape if grid0 is None: assert sin is not None r = sin // 2 grid0 = torch.from_numpy(\ np.mgrid[(-r):(r+1), (-r):(r+1)].transpose([2, 1, 0])) grid1 = grid0.unsqueeze(0).expand([image.size(0), -1, -1, -1]) grid2 = grid1 + pixel_2d.view(image.size(0), 1, 1, 2) - (h - 1) / 2 grid3 = grid2.float() / ((h - 1) / 2) return f.grid_sample(image, grid3)
def theta_to_sampling_grid(out_h,out_w,theta_aff=None,theta_tps=None,theta_aff_tps=None,use_cuda=True,tps_reg_factor=0): affTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='affine',use_cuda=use_cuda) tpsTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='tps',use_cuda=use_cuda,tps_reg_factor=tps_reg_factor) if theta_aff is not None: sampling_grid_aff = affTnf(image_batch=None, theta_batch=theta_aff.view(1,2,3), return_sampling_grid=True, return_warped_image=False) else: sampling_grid_aff=None if theta_tps is not None: sampling_grid_tps = tpsTnf(image_batch=None, theta_batch=theta_tps.view(1,-1), return_sampling_grid=True, return_warped_image=False) else: sampling_grid_tps=None if theta_aff is not None and theta_aff_tps is not None: sampling_grid_aff_tps = tpsTnf(image_batch=None, theta_batch=theta_aff_tps.view(1,-1), return_sampling_grid=True, return_warped_image=False) # put 1e10 value in region out of bounds of sampling_grid_aff sampling_grid_aff = sampling_grid_aff.clone() in_bound_mask_aff=Variable((sampling_grid_aff.data[:,:,:,0]>-1) & (sampling_grid_aff.data[:,:,:,0]<1) & (sampling_grid_aff.data[:,:,:,1]>-1) & (sampling_grid_aff.data[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff=in_bound_mask_aff.expand_as(sampling_grid_aff) sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),torch.mul(in_bound_mask_aff.float(),sampling_grid_aff)) # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3) in_bound_mask_aff_tps=Variable((sampling_grid_aff_tps.data[:,:,:,0]>-1) & (sampling_grid_aff_tps.data[:,:,:,0]<1) & (sampling_grid_aff_tps.data[:,:,:,1]>-1) & (sampling_grid_aff_tps.data[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp) sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp)) else: sampling_grid_aff_tps_comp = None return (sampling_grid_aff,sampling_grid_tps,sampling_grid_aff_tps_comp)
def warp_feature(feature, pts_location, patch_size): # pts_location is [X,Y], patch_size is [H,W] C, H, W = feature.size(0), feature.size(1), feature.size(2) def normalize(x, L): return -1. + 2. * x / (L-1) crop_box = [pts_location[0]-patch_size[1], pts_location[1]-patch_size[0], pts_location[0]+patch_size[1], pts_location[1]+patch_size[0]] crop_box[0] = normalize(crop_box[0], W) crop_box[1] = normalize(crop_box[1], H) crop_box[2] = normalize(crop_box[2], W) crop_box[3] = normalize(crop_box[3], H) affine_parameter = [(crop_box[2]-crop_box[0])/2, MU.np2variable(torch.zeros(1),feature.is_cuda,False), (crop_box[0]+crop_box[2])/2, MU.np2variable(torch.zeros(1),feature.is_cuda,False), (crop_box[3]-crop_box[1])/2, (crop_box[1]+crop_box[3])/2] affine_parameter = torch.cat(affine_parameter).view(2, 3) theta = affine_parameter.unsqueeze(0) feature = feature.unsqueeze(0) grid_size = torch.Size([1, 1, 2*patch_size[0]+1, 2*patch_size[1]+1]) grid = F.affine_grid(theta, grid_size) sub_feature = F.grid_sample(feature, grid).squeeze(0) return sub_feature
def __call__(self, image_batch, theta_batch=None, out_h=None, out_w=None, return_warped_image=True, return_sampling_grid=False, padding_factor=1.0, crop_factor=1.0): if image_batch is None: b=1 else: b=image_batch.size(0) if theta_batch is None: theta_batch = self.theta_identity theta_batch = theta_batch.expand(b,2,3).contiguous() theta_batch = Variable(theta_batch,requires_grad=False) # check if output dimensions have been specified at call time and have changed if (out_h is not None and out_w is not None) and (out_h!=self.out_h or out_w!=self.out_w): if self.geometric_model=='affine': gridGen = AffineGridGen(out_h, out_w) elif self.geometric_model=='tps': gridGen = TpsGridGen(out_h, out_w, use_cuda=self.use_cuda) else: gridGen = self.gridGen sampling_grid = gridGen(theta_batch) # rescale grid according to crop_factor and padding_factor if padding_factor != 1 or crop_factor !=1: sampling_grid = sampling_grid*(padding_factor*crop_factor) # rescale grid according to offset_factor if self.offset_factor is not None: sampling_grid = sampling_grid*self.offset_factor if return_sampling_grid and not return_warped_image: return sampling_grid # sample transformed image warped_image_batch = F.grid_sample(image_batch, sampling_grid) if return_sampling_grid and return_warped_image: return (warped_image_batch,sampling_grid) return warped_image_batch
def _crop_pool_layer(self, bottom, rois, max_pool=True): # implement it using stn # box to affine # input (x1,y1,x2,y2) """ [ x2-x1 x1 + x2 - W + 1 ] [ ----- 0 --------------- ] [ W - 1 W - 1 ] [ ] [ y2-y1 y1 + y2 - H + 1 ] [ 0 ----- --------------- ] [ H - 1 H - 1 ] """ rois = rois.detach() x1 = rois[:, 1::4] / 16.0 y1 = rois[:, 2::4] / 16.0 x2 = rois[:, 3::4] / 16.0 y2 = rois[:, 4::4] / 16.0 height = bottom.size(2) width = bottom.size(3) # affine theta theta = Variable(rois.data.new(rois.size(0), 2, 3).zero_()) theta[:, 0, 0] = ((x2 - x1) / (width - 1)).view(-1) theta[:, 0 ,2] = ((x1 + x2 - width + 1) / (width - 1)).view(-1) theta[:, 1, 1] = ((y2 - y1) / (height - 1)).view(-1) theta[:, 1, 2] = ((y1 + y2 - height + 1) / (height - 1)).view(-1) pre_pool_size = cfg.POOLING_SIZE * 2 if max_pool else cfg.POOLING_SIZE grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) torch.backends.cudnn.enabled = False crops = F.grid_sample(bottom.expand(rois.size(0), bottom.size(1), bottom.size(2), bottom.size(3)), grid) torch.backends.cudnn.enabled = True if max_pool: crops = F.max_pool2d(crops, 2, 2) return crops
def forward(self, x): x_shape = x.size() # (b, c, h, w) offset = self.offset_filter(x) # (b, 2*c, h, w) offset_w, offset_h = torch.split(offset, self.regular_filter.in_channels, 1) # (b, c, h, w) offset_w = offset_w.contiguous().view(-1, int(x_shape[2]), int(x_shape[3])) # (b*c, h, w) offset_h = offset_h.contiguous().view(-1, int(x_shape[2]), int(x_shape[3])) # (b*c, h, w) if not self.input_shape or self.input_shape != x_shape: self.input_shape = x_shape grid_w, grid_h = np.meshgrid(np.linspace(-1, 1, x_shape[3]), np.linspace(-1, 1, x_shape[2])) # (h, w) grid_w = torch.Tensor(grid_w) grid_h = torch.Tensor(grid_h) if self.cuda: grid_w = grid_w.cuda() grid_h = grid_h.cuda() self.grid_w = nn.Parameter(grid_w) self.grid_h = nn.Parameter(grid_h) offset_w = offset_w + self.grid_w # (b*c, h, w) offset_h = offset_h + self.grid_h # (b*c, h, w) x = x.contiguous().view(-1, int(x_shape[2]), int(x_shape[3])).unsqueeze(1) # (b*c, 1, h, w) x = F.grid_sample(x, torch.stack((offset_h, offset_w), 3)) # (b*c, h, w) x = x.contiguous().view(-1, int(x_shape[1]), int(x_shape[2]), int(x_shape[3])) # (b, c, h, w) x = self.regular_filter(x) return x
def forward(self,xIm,target=None,inpMask=None,hrMask=None): #Primary net x=self.primaryNet(xIm) #outp shape (batchnum,chan,lrNum,h,w) #Reshape refxOri=x[...,0:1,:,:] refy=x[...,1: ,:,:] refx=refxOri.repeat(*(1,1,refy.shape[2],1,1)) xReSh=torch.cat([refx,refy],dim=1) #outp shape (batchnum,2*chan,lrNum,h,w) #Note: diffferent from paper, which uses 3D conv before 2D conv #Reshape again for 2D conv _batchNum,_chan,_lrNum,_h,_w=xReSh.shape xReSh=xReSh.permute(0,2,1,3,4).contiguous().view(-1,_chan,_h,_w) #outp shape (batchnum*lrNum,2*chan,h,w) #stn net _w=xReSh.shape[-1] xReSh=self.stnNet(xReSh) #min,max clip xReSh=torch.clamp(xReSh,min=-4/_w,max=4/_w) #generate affine matrix affMat=self._genTransMat_onlyShift(xReSh)#outp shape (batchNum*lrNum,2,3) #register index>0 LR image feature to image feature of index 0 _batchNum,_chan,_lrNum,_h,_w=refy.shape refy=refy.permute(0,2,1,3,4).contiguous().view(-1,_chan,_h,_w)#outp shape (batchnum*lrNum,chan,h,w) grid=F.affine_grid(affMat, refy.size()) refy=F.grid_sample(refy, grid) #reshape feature tensor back to normal refy=refy.view(_batchNum,_lrNum,_chan,_h,_w).permute(0,2,1,3,4) regFeat=torch.cat([refxOri,refy],dim=2) #Fusion net hfResidual=self.fusionNet(regFeat) #outp shape (batchNum, chan=1,h,w) #register index>0 LR image to image of index 0 _batchNum,_chan,_lrNum,_h,_w=xIm[:,:,1:,...].shape regIm=xIm[:,:,1:,...].permute(0,2,1,3,4).contiguous().view(-1,_chan,_h,_w)#outp shape (batchnum*lrNum,chan,h,w) grid=F.affine_grid(affMat, regIm.size()) regIm=F.grid_sample(regIm, grid) regIm=regIm.view(_batchNum,_lrNum,_chan,_h,_w).permute(0,2,1,3,4) regIm=torch.cat([xIm[:,:,0:1,...],regIm],dim=2) #outp shape (batchNum,chan=1,lrNum,h,w) #generate Hr image meanIm=torch.mean(regIm,dim=2) hrIm=meanIm+hfResidual if target is None: return hfResidual,regIm,hrIm else: #check and discard samples where LR images have high mask ratio if inpMask is not None: goodsamp=torch.LongTensor(utils.classifyGoodBadSamples(inpMask,0.8)).to(hrIm.device) hrIm=torch.index_select(hrIm,0,goodsamp) target=torch.index_select(target,0,goodsamp) #loss from pixel similarity if self.findBestLossFlag: hrIm,target,pixLoss=findBestLoss(hrIm,target,self.lossCriterion) else: pixLoss=torch.mean(self.lossCriterion(hrIm,target)) #loss from structural similarity index #ssi=utils.structuralSimilarityGrayScale(hrIm,target) #ssimLoss=torch.mean(1.-ssi) print("loss: %.5f "%(pixLoss)) loss=pixLoss#+ssimLoss*2e-3 return [hfResidual,regIm,hrIm],loss
def grid_cropper(img_t, theta, h=768, w=768): grid = F.affine_grid(theta, torch.Size((1, 3, h, w))) crop = F.grid_sample(img_t, grid.type(torch.float32), padding_mode='zeros') return crop
def interpolate_texture_map(fragments, meshes) -> torch.Tensor: """ Interpolate a 2D texture map using uv vertex texture coordinates for each face in the mesh. First interpolate the vertex uvs using barycentric coordinates for each pixel in the rasterized output. Then interpolate the texture map using the uv coordinate for each pixel. Args: fragments: The outputs of rasterization. From this we use - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices of the faces (in the packed representation) which overlap each pixel in the image. - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying the barycentric coordianates of each pixel relative to the faces (in the packed representation) which overlap the pixel. meshes: Meshes representing a batch of meshes. It is expected that meshes has a textures attribute which is an instance of the Textures class. Returns: texels: tensor of shape (N, H, W, K, C) giving the interpolated texture for each pixel in the rasterized image. """ if not isinstance(meshes.textures, Textures): msg = "Expected meshes.textures to be an instance of Textures; got %r" raise ValueError(msg % type(meshes.textures)) faces_uvs = meshes.textures.faces_uvs_packed() verts_uvs = meshes.textures.verts_uvs_packed() faces_verts_uvs = verts_uvs[faces_uvs] texture_maps = meshes.textures.maps_padded() # pixel_uvs: (N, H, W, K, 2) pixel_uvs = interpolate_face_attributes(fragments.pix_to_face, fragments.bary_coords, faces_verts_uvs) N, H_out, W_out, K = fragments.pix_to_face.shape N, H_in, W_in, C = texture_maps.shape # 3 for RGB # pixel_uvs: (N, H, W, K, 2) -> (N, K, H, W, 2) -> (NK, H, W, 2) pixel_uvs = pixel_uvs.permute(0, 3, 1, 2, 4).reshape(N * K, H_out, W_out, 2) # textures.map: # (N, H, W, C) -> (N, C, H, W) -> (1, N, C, H, W) # -> expand (K, N, C, H, W) -> reshape (N*K, C, H, W) texture_maps = (texture_maps.permute(0, 3, 1, 2)[None, ...].expand( K, -1, -1, -1, -1).transpose(0, 1).reshape(N * K, C, H_in, W_in)) # Textures: (N*K, C, H, W), pixel_uvs: (N*K, H, W, 2) # Now need to format the pixel uvs and the texture map correctly! # From pytorch docs, grid_sample takes `grid` and `input`: # grid specifies the sampling pixel locations normalized by # the input spatial dimensions It should have most # values in the range of [-1, 1]. Values x = -1, y = -1 # is the left-top pixel of input, and values x = 1, y = 1 is the # right-bottom pixel of input. pixel_uvs = pixel_uvs * 2.0 - 1.0 texture_maps = torch.flip(texture_maps, [2]) # flip y axis of the texture map if texture_maps.device != pixel_uvs.device: texture_maps = texture_maps.to(pixel_uvs.device) texels = F.grid_sample(texture_maps, pixel_uvs, align_corners=False) texels = texels.reshape(N, K, C, H_out, W_out).permute(0, 3, 4, 1, 2) return texels
def _crop_pool_layer(self, bottom, rois, scaling_ratio=16.0, mode='bilinear', max_pool=True, use_for_parsing=False): # implement it using stn # box to affine # input (x1,y1,x2,y2) """ [ x2-x1 x1 + x2 - W + 1 ] [ ----- 0 --------------- ] [ W - 1 W - 1 ] [ ] [ y2-y1 y1 + y2 - H + 1 ] [ 0 ----- --------------- ] [ H - 1 H - 1 ] """ rois = rois.detach() x1 = rois[:, 1::4] / scaling_ratio # 16.0 y1 = rois[:, 2::4] / scaling_ratio # 16.0 x2 = rois[:, 3::4] / scaling_ratio # 16.0 y2 = rois[:, 4::4] / scaling_ratio # 16.0 height = bottom.size(2) width = bottom.size(3) # affine theta theta = Variable(rois.data.new(rois.size(0), 2, 3).zero_()) theta[:, 0, 0] = (x2 - x1) / (width - 1) theta[:, 0, 2] = (x1 + x2 - width + 1) / (width - 1) theta[:, 1, 1] = (y2 - y1) / (height - 1) theta[:, 1, 2] = (y1 + y2 - height + 1) / (height - 1) if use_for_parsing: pre_pool_size = cfg.POOLING_SIZE * 4 grid = F.affine_grid( theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) crops = F.grid_sample(bottom.expand(rois.size(0), bottom.size(1), bottom.size(2), bottom.size(3)), grid, mode=mode) else: if max_pool: pre_pool_size = cfg.POOLING_SIZE * 2 grid = F.affine_grid( theta, torch.Size( (rois.size(0), 1, pre_pool_size, pre_pool_size))) crops = F.grid_sample(bottom.expand(rois.size(0), bottom.size(1), bottom.size(2), bottom.size(3)), grid, mode=mode) crops = F.max_pool2d(crops, 2, 2) else: grid = F.affine_grid( theta, torch.Size( (rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE))) crops = F.grid_sample( bottom.expand(rois.size(0), bottom.size(1), bottom.size(2), bottom.size(3)), grid) return crops
def forward(self, X, u): [_, self.bs, c, self.d, self.d] = X.shape T = len(self.t_eval) self.link1_l = torch.sigmoid(self.link1_para) # encode self.phi1_m_t0, self.phi1_v_t0, self.phi1_m_n_t0, self.phi2_m_t0, self.phi2_v_t0, self.phi2_m_n_t0 = self.encode( X[0]) self.phi1_m_t1, self.phi1_v_t1, self.phi1_m_n_t1, self.phi2_m_t1, self.phi2_v_t1, self.phi2_m_n_t1 = self.encode( X[1]) # reparametrize self.Q_phi1 = VonMisesFisher(self.phi1_m_n_t0, self.phi1_v_t0) self.Q_phi2 = VonMisesFisher(self.phi2_m_n_t0, self.phi2_v_t0) self.P_hyper_uni = HypersphericalUniform(1, device=self.device) self.phi1_t0 = self.Q_phi1.rsample() while torch.isnan(self.phi1_t0).any(): self.phi1_t0 = self.Q_phi1.rsample() self.phi2_t0 = self.Q_phi2.rsample() while torch.isnan(self.phi2_t0).any(): self.phi2_t0 = self.Q_phi2.rsample() # estimate velocity self.phi1_dot_t0 = self.angle_vel_est(self.phi1_m_n_t0, self.phi1_m_n_t1, self.t_eval[1] - self.t_eval[0]) self.phi2_dot_t0 = self.angle_vel_est(self.phi2_m_n_t0, self.phi2_m_n_t1, self.t_eval[1] - self.t_eval[0]) # predict z0_u = torch.cat([ self.phi1_t0[:, 0:1], self.phi2_t0[:, 0:1], self.phi1_t0[:, 1:2], self.phi2_t0[:, 1:2], self.phi1_dot_t0, self.phi2_dot_t0, u ], dim=1) zT_u = odeint(self.ode, z0_u, self.t_eval, method=self.hparams.solver) # T, bs, 4 self.qT, self.q_dotT, _ = zT_u.split([4, 2, 2], dim=-1) self.qT = self.qT.view(T * self.bs, 4) # decode ones = torch.ones_like(self.qT[:, 0:1]) self.link1 = self.obs_net_1(ones) self.link2 = self.obs_net_2(ones) theta1 = self.get_theta_inv(self.qT[:, 0], self.qT[:, 2], 0, 0, bs=T * self.bs) # cos phi1, sin phi1 x = self.link1_l * self.qT[:, 2] # l * sin phi1 y = self.link1_l * self.qT[:, 0] # l * cos phi 1 theta2 = self.get_theta_inv(self.qT[:, 1], self.qT[:, 3], x, y, bs=T * self.bs) # cos phi2, sin phi 2 grid1 = F.affine_grid(theta1, torch.Size((T * self.bs, 1, self.d, self.d))) grid2 = F.affine_grid(theta2, torch.Size((T * self.bs, 1, self.d, self.d))) transf_link1 = F.grid_sample( self.link1.view(T * self.bs, 1, self.d, self.d), grid1) transf_link2 = F.grid_sample( self.link2.view(T * self.bs, 1, self.d, self.d), grid2) self.Xrec = torch.cat( [transf_link1, transf_link2, torch.zeros_like(transf_link1)], dim=1) self.Xrec = self.Xrec.view(T, self.bs, 3, self.d, self.d) return None
def main(source_img_root='./data', target_img_root='./data', source_name='image_2', target_name='image_1', seg_root='', args=None, source_keypoint_path='', target_keypoint_path='', output_root='./output', target_folder='', target_name2='', alpha=0.8): if not os.path.exists(output_root): os.mkdir(output_root) source_fn = os.path.join(source_img_root, source_name) target_fn = target_name # print(target_seg_fn) # source_fn = './visualize_landmark/0.jpg' # target_fn = './visualize_landmark/1.jpg' source_img = cv2.imread(source_fn) target_img = cv2.imread(target_fn) print(source_fn, target_fn) """ hsv transfer color """ img_hsv = cv2.cvtColor(target_img, cv2.COLOR_BGR2HSV) mask = np.where( np.logical_and( np.logical_and(30 < img_hsv[:, :, 0], img_hsv[:, :, 0] < 77), img_hsv[:, :, 1] > 70), 1, 0).astype(np.uint8) mask = cv2.blur(cv2.blur(mask, (5, 5)), (3, 3))[:, :, np.newaxis] # print(mask) h, w, _ = target_img.shape x_arr, y_arr, _ = np.nonzero(mask) # print(x_arr, y_arr) x_min = max(np.min(x_arr) - 25, 0) y_min = max(np.min(y_arr) - 25, 0) x_max = min(np.max(x_arr) + 25, h - 1) y_max = min(np.max(y_arr) + 25, w - 1) crop_mask = mask[x_min:x_max, y_min:y_max, :] h, w, _ = crop_mask.shape crop_area = mask.copy() crop_area[x_min:x_min + h, y_min:y_min + w, :] = 1 sh, sw, _ = source_img.shape if h * sw > w * sh: source_img = cv2.resize(source_img, (sw * h // sh, h)) else: source_img = cv2.resize(source_img, (w, sh * w // sw)) sh, sw, _ = source_img.shape start_h = (sh - h) // 2 + args.shift_h start_w = (sw - w) // 2 + args.shift_w pad_num = 45 start_h += pad_num start_w += pad_num img1 = np.pad(source_img[:, :, 0], pad_num, 'symmetric')[start_h:start_h + h, start_w:start_w + w] img2 = np.pad(source_img[:, :, 1], pad_num, 'symmetric')[start_h:start_h + h, start_w:start_w + w] img3 = np.pad(source_img[:, :, 2], pad_num, 'symmetric')[start_h:start_h + h, start_w:start_w + w] source_img = np.concatenate([ img1[:, :, np.newaxis], img2[:, :, np.newaxis], img3[:, :, np.newaxis] ], 2) # source_img = source_img[start_h:start_h + h, start_w:start_w + w, :] crop_logo = source_img * crop_mask logo = target_img.copy() cv2.imwrite(f'./crop_logo_{source_name}.jpg', crop_logo) logo[x_min:x_min + h, y_min:y_min + w, :] = source_img source_fn = target_fn target_fn = os.path.join(target_img_root, target_name2) seg_fn = os.path.join(seg_root, target_name2) source_img = cv2.imread(source_fn) target_img = cv2.imread(target_fn) sh, sw, _ = source_img.shape th, tw, _ = target_img.shape w = max(sw, tw) h = max(sh, th) source_img = np.pad(logo, ((0, h - sh), (0, w - sw), (0, 0)), 'constant', constant_values=(255, 255)) target_img = np.pad(target_img, ((0, h - th), (0, w - tw), (0, 0)), 'constant', constant_values=(255, 255)) target_mask = cv2.imread(seg_fn, cv2.IMREAD_GRAYSCALE) target_mask = np.pad(target_mask, ((0, h - th), (0, w - tw)), 'constant', constant_values=(0, 0)) cv2.imwrite(f'./source_{source_name}.jpg', source_img) cv2.imwrite(f'./target_{source_name}.jpg', target_img) source_keypoint, target_keypoint, raw_source_keypoint, raw_target_keypoint = \ load_keypoints(w=w, h=h, source_name=target_name.split('/')[-1], target_name=target_name2, source_keypoint_path=source_keypoint_path, target_keypoint_path=target_keypoint_path) raw_target_keypoint, target_keypoint = get_align_keypoint( raw_target_keypoint, is_source=False) raw_source_keypoint, source_keypoint = get_align_keypoint( raw_source_keypoint, is_source=True) visualize(target_keypoint, target_fn) visualize(source_keypoint, source_fn) target_keypoint = normalize(target_keypoint[:-2, :], w, h) source_keypoint = normalize(source_keypoint[:-2, :], w, h) _, grid = TPS(target_keypoint, source_keypoint, width=w, height=h, _lambda=args._lambda, calc_new_pos=True) grid = torch.from_numpy(grid) # 619 246 # tensor([0.2597, 0.6458], dtype=torch.float64) source_img = torch.from_numpy(source_img.astype( np.float64)).unsqueeze(dim=0).permute(0, 3, 1, 2) target_img = torch.from_numpy(target_img.astype( np.float64)).unsqueeze(dim=0).permute(0, 3, 1, 2) # print(grid) grid = grid.unsqueeze(dim=0) * 2 - 1.0 # print(grid.shape) # print(grid) warp_img = F.grid_sample(source_img, grid, mode='bilinear', padding_mode='border') warp_img = warp_img.squeeze(dim=0).permute(1, 2, 0) warp_img = warp_img.numpy().astype(np.uint8) target_img = target_img.squeeze(dim=0).permute(1, 2, 0) target_img = target_img.numpy().astype(np.uint8) img_hsv = cv2.cvtColor(target_img, cv2.COLOR_BGR2HSV) # mask = np.where(np.logical_and(np.logical_and(30 < img_hsv[:, :, 0], img_hsv[:, :, 0] < 77), img_hsv[:, :, 1] > 70), # 1, 0).astype(np.uint8) # mask = cv2.blur(cv2.blur(mask, (5, 5)), (3, 3))[:, :, np.newaxis] # hsv_base = cv2.cvtColor(np.array([230, 230, 230], dtype=np.uint8).reshape(1, 1, 3), cv2.COLOR_BGR2HSV) # new_img_hsv, base, scale, mu = standardization(hsv_base, target_img, mask) # target_img = cv2.cvtColor(new_img_hsv, cv2.COLOR_HSV2BGR) * mask + target_img * (1 - mask) # name = '.'.join((target_name2.split('/')[-1]).split('.')[:-1]) # cv2.imwrite(f'./{name}.jpg', target_img) # cv2.imwrite(f'./{name}_mask.jpg', mask * 255) mask = target_mask.astype(float) / 255 mask = mask[:, :, np.newaxis] warp_img = warp_img.astype(np.float32) * target_img.astype( np.float32) / 255 warp_img = warp_img.astype(np.float32) * alpha + target_img.astype( np.float32) * (1 - alpha) cv2.imwrite('./warp.jpg', warp_img) warp_img = (mask * warp_img + (1 - mask) * target_img).astype(np.uint8) warp_img = gauss_blur(warp_img, mask) warp_img = jpeg_blur(warp_img, mask) result = warp_img # (mask * warp_img + (1 - mask) * target_img).astype(np.uint8) cv2.imwrite( os.path.join( output_root, '.'.join( (source_name.split('/')[-1]).split('.')[:-1]) + '_' + '.'.join( (target_name2.split('/')[-1]).split('.')[:-1]) + '.jpg'), result)
def forward(self, adv_patch, lab_batch, img_size, do_rotate=True, rand_loc=True): #adv_patch = F.conv2d(adv_patch.unsqueeze(0),self.kernel,padding=(2,2)) adv_patch = self.medianpooler(adv_patch.unsqueeze(0)) # Determine size of padding pad = (img_size - adv_patch.size(-1)) / 2 # Make a batch of patches adv_patch = adv_patch.unsqueeze(0) #.unsqueeze(0) adv_batch = adv_patch.expand(lab_batch.size(0), lab_batch.size(1), -1, -1, -1) batch_size = torch.Size((lab_batch.size(0), lab_batch.size(1))) # Contrast, brightness and noise transforms # Create random contrast tensor contrast = torch.cuda.FloatTensor(batch_size).uniform_( self.min_contrast, self.max_contrast) contrast = contrast.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) contrast = contrast.expand(-1, -1, adv_batch.size(-3), adv_batch.size(-2), adv_batch.size(-1)) contrast = contrast.cuda() # Create random brightness tensor brightness = torch.cuda.FloatTensor(batch_size).uniform_( self.min_brightness, self.max_brightness) brightness = brightness.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) brightness = brightness.expand(-1, -1, adv_batch.size(-3), adv_batch.size(-2), adv_batch.size(-1)) brightness = brightness.cuda() # Create random noise tensor noise = torch.cuda.FloatTensor(adv_batch.size()).uniform_( -1, 1) * self.noise_factor # Apply contrast/brightness/noise, clamp adv_batch = adv_batch * contrast + brightness + noise adv_batch = torch.clamp(adv_batch, 0.000001, 0.99999) # Where the label class_id is 1 we don't want a patch (padding) --> fill mask with zero's cls_ids = torch.narrow(lab_batch, 2, 0, 1) cls_mask = cls_ids.expand(-1, -1, 3) cls_mask = cls_mask.unsqueeze(-1) cls_mask = cls_mask.expand(-1, -1, -1, adv_batch.size(3)) cls_mask = cls_mask.unsqueeze(-1) cls_mask = cls_mask.expand(-1, -1, -1, -1, adv_batch.size(4)) msk_batch = torch.cuda.FloatTensor(cls_mask.size()).fill_(1) - cls_mask # Pad patch and mask to image dimensions mypad = nn.ConstantPad2d( (int(pad + 0.5), int(pad), int(pad + 0.5), int(pad)), 0) adv_batch = mypad(adv_batch) msk_batch = mypad(msk_batch) # Rotation and rescaling transforms anglesize = (lab_batch.size(0) * lab_batch.size(1)) if do_rotate: angle = torch.cuda.FloatTensor(anglesize).uniform_( self.minangle, self.maxangle) else: angle = torch.cuda.FloatTensor(anglesize).fill_(0) # Resizes and rotates current_patch_size = adv_patch.size(-1) lab_batch_scaled = torch.cuda.FloatTensor(lab_batch.size()).fill_(0) lab_batch_scaled[:, :, 1] = lab_batch[:, :, 1] * img_size lab_batch_scaled[:, :, 2] = lab_batch[:, :, 2] * img_size lab_batch_scaled[:, :, 3] = lab_batch[:, :, 3] * img_size lab_batch_scaled[:, :, 4] = lab_batch[:, :, 4] * img_size target_size = torch.sqrt(((lab_batch_scaled[:, :, 3].mul(0.2))**2) + ((lab_batch_scaled[:, :, 4].mul(0.2))**2)) target_x = lab_batch[:, :, 1].view(np.prod(batch_size)) target_y = lab_batch[:, :, 2].view(np.prod(batch_size)) targetoff_x = lab_batch[:, :, 3].view(np.prod(batch_size)) targetoff_y = lab_batch[:, :, 4].view(np.prod(batch_size)) if (rand_loc): off_x = targetoff_x * (torch.cuda.FloatTensor( targetoff_x.size()).uniform_(-0.4, 0.4)) target_x = target_x + off_x off_y = targetoff_y * (torch.cuda.FloatTensor( targetoff_y.size()).uniform_(-0.4, 0.4)) target_y = target_y + off_y target_y = target_y - 0.05 scale = target_size / current_patch_size scale = scale.view(anglesize) s = adv_batch.size() adv_batch = adv_batch.view(s[0] * s[1], s[2], s[3], s[4]) msk_batch = msk_batch.view(s[0] * s[1], s[2], s[3], s[4]) tx = (-target_x + 0.5) * 2 ty = (-target_y + 0.5) * 2 sin = torch.sin(angle) cos = torch.cos(angle) # Theta = rotation,rescale matrix theta = torch.cuda.FloatTensor(anglesize, 2, 3).fill_(0) theta[:, 0, 0] = cos / scale theta[:, 0, 1] = sin / scale theta[:, 0, 2] = tx * cos / scale + ty * sin / scale theta[:, 1, 0] = -sin / scale theta[:, 1, 1] = cos / scale theta[:, 1, 2] = -tx * sin / scale + ty * cos / scale b_sh = adv_batch.shape grid = F.affine_grid(theta, adv_batch.shape) adv_batch_t = F.grid_sample(adv_batch, grid) msk_batch_t = F.grid_sample(msk_batch, grid) ''' # Theta2 = translation matrix theta2 = torch.cuda.FloatTensor(anglesize, 2, 3).fill_(0) theta2[:, 0, 0] = 1 theta2[:, 0, 1] = 0 theta2[:, 0, 2] = (-target_x + 0.5) * 2 theta2[:, 1, 0] = 0 theta2[:, 1, 1] = 1 theta2[:, 1, 2] = (-target_y + 0.5) * 2 grid2 = F.affine_grid(theta2, adv_batch.shape) adv_batch_t = F.grid_sample(adv_batch_t, grid2) msk_batch_t = F.grid_sample(msk_batch_t, grid2) ''' adv_batch_t = adv_batch_t.view(s[0], s[1], s[2], s[3], s[4]) msk_batch_t = msk_batch_t.view(s[0], s[1], s[2], s[3], s[4]) adv_batch_t = torch.clamp(adv_batch_t, 0.000001, 0.999999) #img = msk_batch_t[0, 0, :, :, :].detach().cpu() #img = transforms.ToPILImage()(img) #img.show() #exit() return adv_batch_t * msk_batch_t
inputImage_cuda = inputImage.cuda(device) inputGrids_cuda = inputGrids.cuda(device) output_cuda = output.cuda(device) using_zero_boundary = False torch_border = 'zeros' if using_zero_boundary else 'border' start = time.time() my_lib_nd.BilinearSamplerBCXY_updateOutput_2D(inputImage, inputGrids, output,using_zero_boundary) print('sampling cpu time taking:', time.time() - start) inputGrids_ordered = torch.zeros_like(inputGrids) inputGrids_ordered[:, 0, ...] = inputGrids[:, 1, ...] inputGrids_ordered[:, 1, ...] = inputGrids[:, 0, ...] output_torch = F.grid_sample(inputImage, inputGrids_ordered.permute([0, 2, 3,1]), 'bilinear',torch_border) out0 = (output-output_torch).view(-1).sum() print(output-output_torch) print("the difference of the current code and old code is {}".format(out0)) start = time.time() my_lib_2D.BilinearSamplerBCWH_updateOutput_cuda_2D(inputImage_cuda, inputGrids_cuda, output_cuda, device_c,using_zero_boundary) print('sampling gpu time taking:', time.time() - start) out1 = (output-(output_cuda).cpu()).view(-1,1).sum()
def PerspectiveTransform(I, H, xv, yv): xvt = (xv*H[0,0]+yv*H[0,1]+H[0,2])/(xv*H[2,0]+yv*H[2,1]+H[2,2]) yvt = (xv*H[1,0]+yv*H[1,1]+H[1,2])/(xv*H[2,0]+yv*H[2,1]+H[2,2]) J = F.grid_sample(I.view(1,1,height,width),torch.stack([xvt,yvt],2).unsqueeze(0)).squeeze() return J
def forward(self, feture_bxdxtxhxw, tbes, tens): ############################################### # 1 padd data with zero bnum, dnum, tnum, hnum, wnum = feture_bxdxtxhxw.shape dev = feture_bxdxtxhxw.device ''' for tbe, ten in zip(tbes, tens): assert tbe >= 0 assert ten <= self.crop featpad_bxdxtxhxw = [] for i in range(bnum): featpad_1xdxt1xhxw = torch.zeros((1, dnum, tbes[i], hnum, wnum), dtype=torch.float32, device=dev) featpad_1xdxt2xhxw = torch.zeros((1, dnum, self.crop - tens[i], hnum, wnum), dtype=torch.float32, device=dev) featpad_1xdxtxhxw = torch.cat([featpad_1xdxt1xhxw, feture_bxdxtxhxw[i:i + 1], featpad_1xdxt2xhxw], dim=2) featpad_bxdxtxhxw.append(featpad_1xdxtxhxw) featpad_bxdxtxhxw = torch.cat(featpad_bxdxtxhxw, dim=0) ''' featpad_bxdxtxhxw = feture_bxdxtxhxw.repeat(1, 1, 4, 1, 1) # 2 params assert hnum == wnum assert hnum == self.spatial_grid sptial_grid = hnum temprol_grid = self.crop ################################################# # step 0, pad data data_BDxTxHxW = featpad_bxdxtxhxw.view(bnum * dnum, self.crop, hnum, wnum) # c gridz_1xMx1x1 = self.gridz_1xMx1x1_todev # data_BDxTxHxW = data_BDxTxHxW * (gridz_1xMx1x1 ** 2) gridz_square_1xMx1x1 = self.gridz_square_1xMx1x1 data_BDxTxHxW = data_BDxTxHxW * gridz_square_1xMx1x1 # numerical issue data_BDxTxHxW = F.relu(data_BDxTxHxW, inplace=False) data_BDxTxHxW = torch.sqrt(data_BDxTxHxW) # datapad_BDx2Tx2Hx2W = torch.zeros((bnum * dnum, 2 * temprol_grid, 2 * sptial_grid, 2 * sptial_grid), dtype=torch.float32, device=dev) datapad_Dx2Tx2Hx2W = self.datapad_Dx2Tx2Hx2W # create new variable assert bnum == 1 # datapad_BDx2Tx2Hx2W = datapad_Dx2Tx2Hx2W.repeat(bnum, 1, 1, 1) datapad_BDx2Tx2Hx2W = datapad_Dx2Tx2Hx2W datapad_BDx2Tx2Hx2W[:, :temprol_grid, :sptial_grid, : sptial_grid] = data_BDxTxHxW ############################################### # 1 fft datazero_Dx2Tx2Hx2W = self.datazero_Dx2Tx2Hx2W datazero_BDx2Tx2Hx2W = datazero_Dx2Tx2Hx2W datapad_BDx2Tx2Hx2Wx2 = torch.stack( [datapad_BDx2Tx2Hx2W, datazero_BDx2Tx2Hx2W], dim=4) datafre_BDX2Tx2Hx2Wx2 = torch.fft(datapad_BDx2Tx2Hx2Wx2, 3) # fftshift datafre_BDX2Tx2Hx2Wx2 = self.roll_1(datafre_BDX2Tx2Hx2Wx2, dim=1, n=temprol_grid) datafre_BDX2Tx2Hx2Wx2 = self.roll_1(datafre_BDX2Tx2Hx2Wx2, dim=2, n=sptial_grid) datafre_BDX2Tx2Hx2Wx2 = self.roll_1(datafre_BDX2Tx2Hx2Wx2, dim=3, n=sptial_grid) ######################################################### # step2, ttrlt trick # simulate interpn # treat x and y as batch, sample z # shift if True: datafre_BDx2x2Hx2Wx2T = datafre_BDX2Tx2Hx2Wx2.permute( 0, 4, 1, 2, 3) ''' size = datafre_BDx2x2Hx2Wx2T.shape theta = torch.from_numpy(np.eye(3, 4, dtype=np.float32)).unsqueeze(0) gridstmp = F.affine_grid(theta, size, align_corners=self.align_corners) x = gridstmp[:, :, :, :, 0:1] y = gridstmp[:, :, :, :, 1:2] z = gridstmp[:, :, :, :, 2:3] ''' newcoord_BDx2Mx2Nx2Nx3 = self.newcoord_dx2Mx2Nx2Nx3_todev.repeat( bnum, 1, 1, 1, 1) if True: datafrenew = F.grid_sample(datafre_BDx2x2Hx2Wx2T, newcoord_BDx2Mx2Nx2Nx3, \ mode='bilinear', padding_mode='zeros', \ align_corners=self.align_corners) else: datafrenew = F.grid_sample(datafre_BDx2x2Hx2Wx2T, newcoord_BDx2Mx2Nx2Nx3, \ mode='bilinear', padding_mode='zeros') tdata_BDx2Tx2Hx2Wx2 = datafrenew.permute(0, 2, 3, 4, 1) tdata_BDx2Tx2Hx2Wx2 = tdata_BDx2Tx2Hx2Wx2.contiguous() ############################################################ # actually, pytorch sampling will lead a little different else: import scipy.interpolate as si zdim = self.zdim xdim = self.xdim ydim = xdim gridznew = self.gridznew.numpy() gridy_2Mx2Nx2N = self.gridy_2Mx2Nx2N.numpy() gridx_2Mx2Nx2N = self.gridx_2Mx2Nx2N.numpy() datafre_bdxtxhxwx2 = datafre_BDX2Tx2Hx2Wx2.detach().cpu().numpy() datafre_bdxtxhxw = datafre_bdxtxhxwx2[:, :, :, :, 0] + 1j * datafre_bdxtxhxwx2[:, :, :, :, 1] re = [] for datafre in datafre_bdxtxhxw: tvol = si.interpn(points=(zdim, ydim, xdim), values=datafre, \ xi=np.stack([gridznew, gridy_2Mx2Nx2N, gridx_2Mx2Nx2N], axis=3), \ method='linear', bounds_error=False, fill_value=0) re.append(tvol) re_bdxtxhxw = np.stack(re) re_real_bdxtxhxw = np.real(re_bdxtxhxw) re_imag_bdxtxhxw = np.imag(re_bdxtxhxw) re_real_bdxtxhxw = torch.from_numpy(re_real_bdxtxhxw).to(dev) re_imag_bdxtxhxw = torch.from_numpy(re_imag_bdxtxhxw).to(dev) tdata_BDx2Tx2Hx2Wx2 = torch.stack( [re_real_bdxtxhxw, re_imag_bdxtxhxw], dim=4) ############################################################# samplez_1xMxNxNx1 = self.gridz_2Mx2Nx2N_todev.unsqueeze(0).unsqueeze(4) sampleznew = self.gridznew_todev.unsqueeze(0).unsqueeze(4) tdata_BDx2Tx2Hx2Wx2[:, :self.z0pos, :, :, :] = 0 tdata_BDx2Tx2Hx2Wx2 = tdata_BDx2Tx2Hx2Wx2 * samplez_1xMxNxNx1.abs() tdata_BDx2Tx2Hx2Wx2 = tdata_BDx2Tx2Hx2Wx2 / (sampleznew + 1e-8) ########################################### # ifft tdata_BDx2Tx2Hx2Wx2 = self.roll_1(tdata_BDx2Tx2Hx2Wx2, dim=1, n=temprol_grid) tdata_BDx2Tx2Hx2Wx2 = self.roll_1(tdata_BDx2Tx2Hx2Wx2, dim=2, n=sptial_grid) tdata_BDx2Tx2Hx2Wx2 = self.roll_1(tdata_BDx2Tx2Hx2Wx2, dim=3, n=sptial_grid) data = torch.ifft(tdata_BDx2Tx2Hx2Wx2, 3) data = data[:, :temprol_grid, :sptial_grid, :sptial_grid] data = data[:, :, :, :, 0]**2 + data[:, :, :, :, 1]**2 ##########################################################################3 volumn_BDxTxHxW = data.view(bnum * dnum, self.crop, hnum, wnum) volumn_BxDxTxHxW = volumn_BDxTxHxW.view(bnum, dnum, self.crop, hnum, wnum) return volumn_BxDxTxHxW
def neko_sample(feat,grid,dw,dh): dst = trnf.grid_sample(feat, grid.permute(0, 2, 3, 1),mode="bilinear"); return trnf.adaptive_avg_pool2d(dst,[dh,dw]);
def generate_video(im_name, normalise): # Load in the correspondences and images im1 = Image.open(os.environ['BASE_PATH'] + '/imL/%s.jpg' % im_name) im2 = Image.open(os.environ['BASE_PATH'] + '/imR/%s.jpg' % im_name) if normalise: np_tempwarp = np.load(os.environ['BASE_PATH'] + '/warps/temp_sampler%s_2_grad_norm.npz' % im_name) H = np_tempwarp['H'] np_tempwarp = np_tempwarp['sampler'] else: np_tempwarp = np.load(os.environ['BASE_PATH'] + '/warps/temp_sampler%s_2_grad_coarse.npz.npy' % im_name) if normalise: im1_arr = np.array(im1) im2_arr = np.array(im2) K1 = np.eye(3) K1[0, 0] = 2 / im1_arr.shape[1] K1[1, 1] = 2 / im1_arr.shape[0] K1[0:2, 2] = -1 K2 = np.eye(3) K2[0, 0] = 2 / im2_arr.shape[1] K2[1, 1] = 2 / im2_arr.shape[0] K2[0:2, 2] = -1 aff_mat = (np.linalg.inv(K2) @ H @ K1) # Now transform the image and return warp_im1 = cv2.warpAffine(im1_arr, aff_mat[0:2], (im2_arr.shape[1], im2_arr.shape[0])) im1 = warp_im1 im1 = Image.fromarray(im1) warp = torch.Tensor(np_tempwarp).unsqueeze(0) im1_torch = tr.ToTensor()(im1).unsqueeze(0) im2_torch = tr.ToTensor()(im2).unsqueeze(0) gen_img = F.grid_sample(im1_torch, warp) sampler = F.upsample(warp.permute(0, 3, 1, 2), size=(im2_torch.size(2), im2_torch.size(3))) gen_imglarge = F.grid_sample(im1_torch, sampler.permute(0, 2, 3, 1)) W1, W2, _ = np_tempwarp.shape orig_warp = torch.meshgrid(torch.linspace(-1, 1, W1), torch.linspace(-1, 1, W2)) orig_warp = torch.cat( (orig_warp[1].unsqueeze(2), orig_warp[0].unsqueeze(2)), 2) orig_warp = orig_warp.unsqueeze(0) warp = torch.Tensor(np_tempwarp).unsqueeze(0) new_imgs = [] if not os.path.exists('./temp%s/%s' % (im_name, im_name)): os.makedirs('./temp%s/%s' % (im_name, im_name)) radius = 2 * 2 / 1024. for i in tqdm(range(-10, 30)): resample = (orig_warp * float(i) / 20. + warp * float(20 - i) / 20.) pts3D = resample.view(1, -1, 2) pts_mask = (warp.view(-1, 2)[:, 0] > -1) & (warp.view(-1, 2)[:, 0] < 1) pts3D = pts3D[:, pts_mask, :] pts3D = -pts3D pts3D = torch.cat((pts3D.cuda(), torch.ones( (1, pts3D.size(1), 1)).cuda()), 2) rgb = F.grid_sample(im2_torch, orig_warp).permute(0, 2, 3, 1).view(1, -1, 3)[:, pts_mask, :] mask = torch.ones((1, rgb.size(1), 1)).cuda() pts3DRGB = Pointclouds(points=pts3D, features=rgb) points_idx, _, dist = rasterize_points(pts3DRGB, 1024, radius, 1) gen_img = pts3DRGB.features_packed()[ points_idx.permute(0, 3, 1, 2).long()[0], :].permute( 0, 3, 1, 2).mean(dim=0, keepdim=True) new_imgs += [gen_img.squeeze().permute(1, 2, 0)] torchvision.utils.save_image( gen_img, './temp%s/%s/im-%03d.png' % (im_name, im_name, i + 10)) mask = (points_idx.permute(0, 3, 1, 2) < 0).float() torchvision.utils.save_image( mask, './temp%s/%s/mask-%03d.png' % (im_name, im_name, i + 10))
def rotate_weight(self, ): if self.grid is not None: return F.grid_sample(self.weight, self.grid) else: return self.weight
def sample_salient_points(self, line_seg, desc, img_size, saliency_type='d2_net'): """ Sample the most salient points along each line segments, with a minimal distance between each point. Pad the remaining points. Inputs: line_seg: an Nx2x2 torch.Tensor. desc: a NxDxHxW torch.Tensor. image_size: the original image size. saliency_type: 'd2_net' or 'asl_feat'. Outputs: line_points: an Nxnum_samplesx2 np.array. valid_points: a boolean Nxnum_samples np.array. """ device = desc.device if not self.line_score: # Compute the score map if saliency_type == "d2_net": score = self.d2_net_saliency_score(desc) else: score = self.asl_feat_saliency_score(desc) num_lines = len(line_seg) line_lengths = np.linalg.norm(line_seg[:, 0] - line_seg[:, 1], axis=1) # The number of samples depends on the length of the line num_samples_lst = np.clip(line_lengths // self.min_dist_pts, 2, self.num_samples) line_points = np.empty((num_lines, self.num_samples, 2), dtype=float) valid_points = np.empty((num_lines, self.num_samples), dtype=bool) # Sample the score on a fixed number of points of each line n_samples_per_region = 4 for n in np.arange(2, self.num_samples + 1): sample_rate = n * n_samples_per_region # Consider all lines where we can fit up to n points cur_mask = num_samples_lst == n cur_line_seg = line_seg[cur_mask] cur_num_lines = len(cur_line_seg) if cur_num_lines == 0: continue line_points_x = np.linspace(cur_line_seg[:, 0, 0], cur_line_seg[:, 1, 0], sample_rate, axis=-1) line_points_y = np.linspace(cur_line_seg[:, 0, 1], cur_line_seg[:, 1, 1], sample_rate, axis=-1) cur_line_points = np.stack([line_points_x, line_points_y], axis=-1).reshape(-1, 2) # cur_line_points is of shape (n_cur_lines * sample_rate, 2) cur_line_points = torch.tensor(cur_line_points, dtype=torch.float, device=device) grid_points = keypoints_to_grid(cur_line_points, img_size) if self.line_score: # The saliency score is high when the activation are locally # maximal along the line (and not in a square neigborhood) line_desc = F.grid_sample(desc, grid_points).squeeze() line_desc = line_desc.reshape(-1, cur_num_lines, sample_rate) line_desc = line_desc.permute(1, 0, 2) if saliency_type == "d2_net": scores = self.d2_net_saliency_score(line_desc) else: scores = self.asl_feat_saliency_score(line_desc) else: scores = F.grid_sample(score.unsqueeze(1), grid_points).squeeze() # Take the most salient point in n distinct regions scores = scores.reshape(-1, n, n_samples_per_region) best = torch.max(scores, dim=2, keepdim=True)[1].cpu().numpy() cur_line_points = cur_line_points.reshape(-1, n, n_samples_per_region, 2) cur_line_points = np.take_along_axis( cur_line_points, best[..., None], axis=2)[:, :, 0] # Pad cur_valid_points = np.ones((cur_num_lines, self.num_samples), dtype=bool) cur_valid_points[:, n:] = False cur_line_points = np.concatenate([ cur_line_points, np.zeros((cur_num_lines, self.num_samples - n, 2), dtype=float)], axis=1) line_points[cur_mask] = cur_line_points valid_points[cur_mask] = cur_valid_points return line_points, valid_points
def forward(self, p, x): x = x.unsqueeze(1) p_features = p.transpose(1, -1) p = p.unsqueeze(1).unsqueeze(1) p = torch.cat([p + d for d in self.displacments], dim=2) # (B,1,7,num_samples,3) feature_0 = F.grid_sample( x, p, padding_mode='border') # out : (B,C (of x), 1,1,sample_num) net = self.actvn(self.conv_in(x)) net = self.conv_in_bn(net) feature_1 = F.grid_sample( net, p, padding_mode='border') # out : (B,C (of x), 1,1,sample_num) net = self.maxpool(net) net = self.actvn(self.conv_0(net)) net = self.actvn(self.conv_0_1(net)) net = self.conv0_1_bn(net) feature_2 = F.grid_sample( net, p, padding_mode='border') # out : (B,C (of x), 1,1,sample_num) net = self.maxpool(net) net = self.actvn(self.conv_1(net)) net = self.actvn(self.conv_1_1(net)) net = self.conv1_1_bn(net) feature_3 = F.grid_sample( net, p, padding_mode='border') # out : (B,C (of x), 1,1,sample_num) net = self.maxpool(net) net = self.actvn(self.conv_2(net)) net = self.actvn(self.conv_2_1(net)) net = self.conv2_1_bn(net) feature_4 = F.grid_sample(net, p, padding_mode='border') net = self.maxpool(net) net = self.actvn(self.conv_3(net)) net = self.actvn(self.conv_3_1(net)) net = self.conv3_1_bn(net) feature_5 = F.grid_sample(net, p, padding_mode='border') # here every channel corresponds to one feature. features = torch.cat( (feature_0, feature_1, feature_2, feature_3, feature_4, feature_5), dim=1) # (B, features, 1,7,sample_num) shape = features.shape features = torch.reshape( features, (shape[0], shape[1] * shape[3], shape[4])) # (B, featues_per_sample, samples_num) #features = torch.cat((features, p_features), dim=1) # (B, featue_size, samples_num) net = self.actvn(self.fc_0(features)) net = self.actvn(self.fc_1(net)) net = self.actvn(self.fc_2(net)) net = self.fc_out(net) out = net.squeeze(1) return out
def unproject_heatmaps(heatmaps, proj_matricies, coord_volumes, volume_aggregation_method='sum', vol_confidences=None): device = heatmaps.device batch_size, n_views, n_joints, heatmap_shape = heatmaps.shape[ 0], heatmaps.shape[1], heatmaps.shape[2], tuple(heatmaps.shape[3:]) volume_shape = coord_volumes.shape[1:4] volume_batch = torch.zeros(batch_size, n_joints, *volume_shape, device=device) # TODO: speed up this this loop for batch_i in range(batch_size): coord_volume = coord_volumes[batch_i] grid_coord = coord_volume.reshape((-1, 3)) volume_batch_to_aggregate = torch.zeros(n_views, n_joints, *volume_shape, device=device) for view_i in range(n_views): heatmap = heatmaps[batch_i, view_i] heatmap = heatmap.unsqueeze(0) grid_coord_proj = multiview.project_3d_points_to_image_plane_without_distortion( proj_matricies[batch_i, view_i], grid_coord, convert_back_to_euclidean=False) invalid_mask = grid_coord_proj[:, 2] <= 0.0 # depth must be larger than 0.0 grid_coord_proj[grid_coord_proj[:, 2] == 0.0, 2] = 1.0 # not to divide by zero grid_coord_proj = multiview.homogeneous_to_euclidean( grid_coord_proj) # transform to [-1.0, 1.0] range grid_coord_proj_transformed = torch.zeros_like(grid_coord_proj) grid_coord_proj_transformed[:, 0] = 2 * ( grid_coord_proj[:, 0] / heatmap_shape[0] - 0.5) grid_coord_proj_transformed[:, 1] = 2 * ( grid_coord_proj[:, 1] / heatmap_shape[1] - 0.5) grid_coord_proj = grid_coord_proj_transformed # prepare to F.grid_sample grid_coord_proj = grid_coord_proj.unsqueeze(1).unsqueeze(0) try: current_volume = F.grid_sample(heatmap, grid_coord_proj, align_corners=True) except TypeError: # old PyTorch current_volume = F.grid_sample(heatmap, grid_coord_proj) # zero out non-valid points current_volume = current_volume.view(n_joints, -1) current_volume[:, invalid_mask] = 0.0 # reshape back to volume current_volume = current_volume.view(n_joints, *volume_shape) # collect volume_batch_to_aggregate[view_i] = current_volume # agregate resulting volume if volume_aggregation_method.startswith('conf'): volume_batch[batch_i] = (volume_batch_to_aggregate * vol_confidences[batch_i].view( n_views, n_joints, 1, 1, 1)).sum(0) elif volume_aggregation_method == 'sum': volume_batch[batch_i] = volume_batch_to_aggregate.sum(0) elif volume_aggregation_method == 'max': volume_batch[batch_i] = volume_batch_to_aggregate.max(0)[0] elif volume_aggregation_method == 'softmax': volume_batch_to_aggregate_softmin = volume_batch_to_aggregate.clone( ) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view( n_views, -1) volume_batch_to_aggregate_softmin = nn.functional.softmax( volume_batch_to_aggregate_softmin, dim=0) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view( n_views, n_joints, *volume_shape) volume_batch[batch_i] = (volume_batch_to_aggregate * volume_batch_to_aggregate_softmin).sum(0) else: raise ValueError("Unknown volume_aggregation_method: {}".format( volume_aggregation_method)) return volume_batch
def warp_perspective(src: torch.Tensor, M: torch.Tensor, dsize: Tuple[int, int], mode: str = 'bilinear', padding_mode: str = 'zeros', align_corners: Optional[bool] = None) -> torch.Tensor: r"""Applies a perspective transformation to an image. The function warp_perspective transforms the source image using the specified matrix: .. math:: \text{dst} (x, y) = \text{src} \left( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} , \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right ) Args: src (torch.Tensor): input image with shape :math:`(B, C, H, W)`. M (torch.Tensor): transformation matrix with shape :math:`(B, 3, 3)`. dsize (tuple): size of the output image (height, width). mode (str): interpolation mode to calculate output values 'bilinear' | 'nearest'. Default: 'bilinear'. padding_mode (str): padding mode for outside grid values 'zeros' | 'border' | 'reflection'. Default: 'zeros'. align_corners(bool, optional): interpolation flag. Default: None. Returns: torch.Tensor: the warped input image :math:`(B, C, H, W)`. Example: >>> img = torch.rand(1, 4, 5, 6) >>> H = torch.eye(3)[None] >>> out = warp_perspective(img, H, (4, 2), align_corners=True) >>> print(out.shape) torch.Size([1, 4, 4, 2]) .. note:: This function is often used in conjuntion with :func:`get_perspective_transform`. .. note:: See a working example `here <https://kornia.readthedocs.io/en/latest/ tutorials/warp_perspective.html>`_. """ if not isinstance(src, torch.Tensor): raise TypeError("Input src type is not a torch.Tensor. Got {}".format( type(src))) if not isinstance(M, torch.Tensor): raise TypeError("Input M type is not a torch.Tensor. Got {}".format( type(M))) if not len(src.shape) == 4: raise ValueError("Input src must be a BxCxHxW tensor. Got {}".format( src.shape)) if not (len(M.shape) == 3 and M.shape[-2:] == (3, 3)): raise ValueError("Input M must be a Bx3x3 tensor. Got {}".format( M.shape)) # TODO: remove the statement below in kornia v0.6 if align_corners is None: message: str = ( "The align_corners default value has been changed. By default now is set True " "in order to match cv2.warpPerspective. In case you want to keep your previous " "behaviour set it to False. This warning will disappear in kornia > v0.6." ) warnings.warn(message) # set default value for align corners align_corners = True B, C, H, W = src.size() h_out, w_out = dsize # we normalize the 3x3 transformation matrix and convert to 3x4 dst_norm_trans_src_norm: torch.Tensor = normalize_homography( M, (H, W), (h_out, w_out)) # Bx3x3 src_norm_trans_dst_norm = torch.inverse(dst_norm_trans_src_norm) # Bx3x3 # this piece of code substitutes F.affine_grid since it does not support 3x3 grid = create_meshgrid(h_out, w_out, normalized_coordinates=True, device=src.device).to(src.dtype).repeat(B, 1, 1, 1) grid = transform_points(src_norm_trans_dst_norm[:, None, None], grid) return F.grid_sample(src, grid, align_corners=align_corners, mode=mode, padding_mode=padding_mode)
def forward(self, data, is_volatile=False, specific_rotation=-1): if is_volatile: torch.set_grad_enabled(False) output_prob = [] # Apply rotations to images for rotate_idx in range(self.num_rotations): rotate_theta = np.radians(rotate_idx * (360 / self.num_rotations)) # Compute sample grid for rotation BEFORE neural network affine_mat_before = np.asarray( [[np.cos(-rotate_theta), np.sin(-rotate_theta), 0], [-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]]) affine_mat_before.shape = (2, 3, 1) affine_mat_before = torch.from_numpy( affine_mat_before).permute(2, 0, 1).float() flow_grid_before = F.affine_grid( Variable(affine_mat_before, requires_grad=False).to(self.device), data.size()) # Rotate images clockwise rotate_data = F.grid_sample(Variable(data, volatile=True).to( self.device), flow_grid_before, mode='nearest') # Compute features if self.net is not None: push_feat, grasp_feat, place_feat = self.net.forward( rotate_data) else: push_feat = self.push_net.forward(rotate_data) grasp_feat = self.grasp_net.forward(rotate_data) place_feat = self.place_net.forward(rotate_data) # Compute sample grid for rotation AFTER branches affine_mat_after = np.asarray( [[np.cos(rotate_theta), np.sin(rotate_theta), 0], [-np.sin(rotate_theta), np.cos(rotate_theta), 0]]) affine_mat_after.shape = (2, 3, 1) affine_mat_after = torch.from_numpy(affine_mat_after).permute( 2, 0, 1).float() flow_grid_after = F.affine_grid( Variable(affine_mat_after, requires_grad=False).to(self.device), push_feat.data.size()) # Forward pass through branches, undo rotation on output predictions, upsample results output_prob.append([ F.grid_sample(push_feat, flow_grid_after, mode='nearest'), F.grid_sample(grasp_feat, flow_grid_after, mode='nearest'), F.grid_sample(place_feat, flow_grid_after, mode='nearest') ]) torch.set_grad_enabled(True) return output_prob else: self.output_prob = [] rotate_idx = specific_rotation rotate_theta = np.radians(rotate_idx * (360 / self.num_rotations)) # Compute sample grid for rotation BEFORE branches affine_mat_before = np.asarray( [[np.cos(-rotate_theta), np.sin(-rotate_theta), 0], [-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]]) affine_mat_before.shape = (2, 3, 1) affine_mat_before = torch.from_numpy(affine_mat_before).permute( 2, 0, 1).float() flow_grid_before = F.affine_grid( Variable(affine_mat_before, requires_grad=False).to(self.device), data.size()) # Rotate images clockwise rotate_data = F.grid_sample(Variable(data, requires_grad=False).to( self.device), flow_grid_before, mode='nearest') # Compute features if self.net is not None: push_feat, grasp_feat, place_feat = self.net.forward( rotate_data) else: push_feat = self.push_net.forward(rotate_data) grasp_feat = self.grasp_net.forward(rotate_data) place_feat = self.place_net.forward(rotate_data) # Compute sample grid for rotation AFTER branches affine_mat_after = np.asarray( [[np.cos(rotate_theta), np.sin(rotate_theta), 0], [-np.sin(rotate_theta), np.cos(rotate_theta), 0]]) affine_mat_after.shape = (2, 3, 1) affine_mat_after = torch.from_numpy(affine_mat_after).permute( 2, 0, 1).float() flow_grid_after = F.affine_grid( Variable(affine_mat_after, requires_grad=False).to(self.device), push_feat.data.size()) # Forward pass through branches, undo rotation on output predictions, upsample results self.output_prob.append([ F.grid_sample(push_feat, flow_grid_after, mode='nearest'), F.grid_sample(grasp_feat, flow_grid_after, mode='nearest'), F.grid_sample(place_feat, flow_grid_after, mode='nearest') ]) return self.output_prob
def remap(tensor: torch.Tensor, map_x: torch.Tensor, map_y: torch.Tensor, mode: str = 'bilinear', padding_mode: str = 'zeros', align_corners: Optional[bool] = None, normalized_coordinates: bool = False) -> torch.Tensor: r"""Applies a generic geometrical transformation to a tensor. The function remap transforms the source tensor using the specified map: .. math:: \text{dst}(x, y) = \text{src}(map_x(x, y), map_y(x, y)) Args: tensor (torch.Tensor): the tensor to remap with shape (B, D, H, W). Where D is the number of channels. map_x (torch.Tensor): the flow in the x-direction in pixel coordinates. The tensor must be in the shape of (B, H, W). map_y (torch.Tensor): the flow in the y-direction in pixel coordinates. The tensor must be in the shape of (B, H, W). mode (str): interpolation mode to calculate output values 'bilinear' | 'nearest'. Default: 'bilinear'. padding_mode (str): padding mode for outside grid values 'zeros' | 'border' | 'reflection'. Default: 'zeros'. align_corners (bool, optional): mode for grid_generation. Default: None. normalized_coordinates (bool): whether the input coordinates are normalised in the range of [-1, 1]. Default: False Returns: torch.Tensor: the warped tensor with same shape as the input grid maps. Example: >>> from kornia.utils import create_meshgrid >>> grid = create_meshgrid(2, 2, False) # 1x2x2x2 >>> grid += 1 # apply offset in both directions >>> input = torch.ones(1, 1, 2, 2) >>> remap(input, grid[..., 0], grid[..., 1], align_corners=True) # 1x1x2x2 tensor([[[[1., 0.], [0., 0.]]]]) .. note:: This function is often used in conjuntion with :func:`create_meshgrid`. """ if not isinstance(tensor, torch.Tensor): raise TypeError( "Input tensor type is not a torch.Tensor. Got {}".format( type(tensor))) if not isinstance(map_x, torch.Tensor): raise TypeError( "Input map_x type is not a torch.Tensor. Got {}".format( type(map_x))) if not isinstance(map_y, torch.Tensor): raise TypeError( "Input map_y type is not a torch.Tensor. Got {}".format( type(map_y))) if not tensor.shape[-2:] == map_x.shape[-2:] == map_y.shape[-2:]: raise ValueError("Inputs last two dimensions must match.") batch_size, _, height, width = tensor.shape # grid_sample need the grid between -1/1 map_xy: torch.Tensor = torch.stack([map_x, map_y], dim=-1) # normalize coordinates if not already normalized if not normalized_coordinates: map_xy = normalize_pixel_coordinates(map_xy, height, width) # simulate broadcasting since grid_sample does not support it map_xy_norm: torch.Tensor = map_xy.expand(batch_size, -1, -1, -1) # warp ans return tensor_warped: torch.Tensor = F.grid_sample(tensor, map_xy_norm, mode=mode, padding_mode=padding_mode, align_corners=align_corners) return tensor_warped
def test_gmm(opt, test_loader, model, board): model.to(opt.device) model.eval() base_name = os.path.basename(opt.checkpoint) name = opt.name save_dir = os.path.join(opt.result_dir, name, opt.datamode) if not os.path.exists(save_dir): os.makedirs(save_dir) warp_cloth_dir = os.path.join(save_dir, 'warp-cloth') if not os.path.exists(warp_cloth_dir): os.makedirs(warp_cloth_dir) warp_mask_dir = os.path.join(save_dir, 'warp-mask') if not os.path.exists(warp_mask_dir): os.makedirs(warp_mask_dir) result_dir1 = os.path.join(save_dir, 'result_dir') if not os.path.exists(result_dir1): os.makedirs(result_dir1) overlayed_TPS_dir = os.path.join(save_dir, 'overlayed_TPS') if not os.path.exists(overlayed_TPS_dir): os.makedirs(overlayed_TPS_dir) warped_grid_dir = os.path.join(save_dir, 'warped_grid') if not os.path.exists(warped_grid_dir): os.makedirs(warped_grid_dir) for step, inputs in enumerate(test_loader.data_loader): iter_start_time = time.time() c_names = inputs['c_name'] im_names = inputs['im_name'] im = inputs['image'].to(opt.device) im_pose = inputs['pose_image'].to(opt.device) im_h = inputs['head'].to(opt.device) shape = inputs['shape'].to(opt.device) agnostic = inputs['agnostic'].to(opt.device) c = inputs['cloth'].to(opt.device) cm = inputs['cloth_mask'].to(opt.device) im_c = inputs['parse_cloth'].to(opt.device) im_g = inputs['grid_image'].to(opt.device) shape_ori = inputs['shape_ori'] # original body shape without blurring grid, theta = model(agnostic, cm) warped_cloth = F.grid_sample(c, grid, padding_mode='border') warped_mask = F.grid_sample(cm, grid, padding_mode='zeros') warped_grid = F.grid_sample(im_g, grid, padding_mode='zeros') overlay = 0.7 * warped_cloth + 0.3 * im visuals = [[im_h, shape, im_pose], [c, warped_cloth, im_c], [warped_grid, (warped_cloth + im) * 0.5, im]] # save_images(warped_cloth, c_names, warp_cloth_dir) # save_images(warped_mask*2-1, c_names, warp_mask_dir) save_images(warped_cloth, im_names, warp_cloth_dir) save_images(warped_mask * 2 - 1, im_names, warp_mask_dir) save_images( shape_ori.to(opt.device) * 0.2 + warped_cloth * 0.8, im_names, result_dir1) save_images(warped_grid, im_names, warped_grid_dir) save_images(overlay, im_names, overlayed_TPS_dir) if (step + 1) % opt.display_count == 0: board_add_images(board, 'combine', visuals, step + 1) t = time.time() - iter_start_time print('step: %8d, time: %.3f' % (step + 1, t), flush=True)
def generate_mario_data(ntrain=10000, ntest=5000, batch_size=128, dpath="./data/", dataseed=88): imgs = np.load(dpath + "images.npz") mario = torch.FloatTensor(imgs['mario']) iggy = torch.FloatTensor(imgs['iggy']) ntrain_each = int(ntrain / 2) ntest_each = int(ntest / 2) train_mario = torch.cat(ntrain_each * [mario]) train_iggy = torch.cat(ntrain_each * [iggy]) test_mario = torch.cat(ntest_each * [mario]) test_iggy = torch.cat(ntest_each * [iggy]) torch.random.manual_seed(dataseed) ## get angles and make labels ## ## this is a bunch of stupid algebra ## train_mario_pos = torch.rand(int( ntrain_each / 2)) * np.pi / 2. - np.pi / 4. neg_angles = torch.rand(int(ntrain_each / 2)) * np.pi / 2. - np.pi / 4. train_mario_neg = neg_angles.clone() train_mario_neg[neg_angles < 0] = neg_angles[neg_angles < 0] + np.pi train_mario_neg[neg_angles > 0] = neg_angles[neg_angles > 0] - np.pi train_mario_angles = torch.cat((train_mario_pos, train_mario_neg)) train_iggy_pos = torch.rand(int(ntrain_each / 2)) * np.pi / 2. - np.pi / 4. neg_angles = torch.rand(int(ntrain_each / 2)) * np.pi / 2. - np.pi / 4. train_iggy_neg = neg_angles.clone() train_iggy_neg[neg_angles < 0] = neg_angles[neg_angles < 0] + np.pi train_iggy_neg[neg_angles > 0] = neg_angles[neg_angles > 0] - np.pi train_iggy_angles = torch.cat((train_iggy_pos, train_iggy_neg)) test_mario_pos = torch.rand(int(ntest_each / 2)) * np.pi / 2. - np.pi / 4. neg_angles = torch.rand(int(ntest_each / 2)) * np.pi / 2. - np.pi / 4. test_mario_neg = neg_angles.clone() test_mario_neg[neg_angles < 0] = neg_angles[neg_angles < 0] + np.pi test_mario_neg[neg_angles > 0] = neg_angles[neg_angles > 0] - np.pi test_mario_angles = torch.cat((test_mario_pos, test_mario_neg)) test_iggy_pos = torch.rand(int(ntest_each / 2)) * np.pi / 2. - np.pi / 4. neg_angles = torch.rand(int(ntest_each / 2)) * np.pi / 2. - np.pi / 4. test_iggy_neg = neg_angles.clone() test_iggy_neg[neg_angles < 0] = neg_angles[neg_angles < 0] + np.pi test_iggy_neg[neg_angles > 0] = neg_angles[neg_angles > 0] - np.pi test_iggy_angles = torch.cat((test_iggy_pos, test_iggy_neg)) train_mario_labs = torch.zeros_like(train_mario_angles) train_mario_labs[train_mario_angles.abs() > 1.] = 1. train_iggy_labs = torch.zeros_like(train_iggy_angles) train_iggy_labs[train_iggy_angles.abs() < 1.] = 2. train_iggy_labs[train_iggy_angles.abs() > 1.] = 3. test_mario_labs = torch.zeros_like(test_mario_angles) test_mario_labs[test_mario_angles.abs() > 1.] = 1. test_iggy_labs = torch.zeros_like(test_iggy_angles) test_iggy_labs[test_iggy_angles.abs() < 1.] = 2. test_iggy_labs[test_iggy_angles.abs() > 1.] = 3. ## combine to just train and test ## train_images = torch.cat((train_mario, train_iggy)) test_images = torch.cat((test_mario, test_iggy)) train_angles = torch.cat((train_mario_angles, train_iggy_angles)) test_angles = torch.cat((test_mario_angles, test_iggy_angles)) train_labs = torch.cat( (train_mario_labs, train_iggy_labs)).type(torch.LongTensor) test_labs = torch.cat( (test_mario_labs, test_iggy_labs)).type(torch.LongTensor) ## rotate ## # train # with torch.no_grad(): # Build affine matrices for random translation of each image affineMatrices = torch.zeros(ntrain, 2, 3) affineMatrices[:, 0, 0] = train_angles.cos() affineMatrices[:, 1, 1] = train_angles.cos() affineMatrices[:, 0, 1] = train_angles.sin() affineMatrices[:, 1, 0] = -train_angles.sin() flowgrid = F.affine_grid(affineMatrices, size=train_images.size()) train_images = F.grid_sample(train_images, flowgrid) # test # with torch.no_grad(): # Build affine matrices for random translation of each image affineMatrices = torch.zeros(ntest, 2, 3) affineMatrices[:, 0, 0] = test_angles.cos() affineMatrices[:, 1, 1] = test_angles.cos() affineMatrices[:, 0, 1] = test_angles.sin() affineMatrices[:, 1, 0] = -test_angles.sin() flowgrid = F.affine_grid(affineMatrices, size=test_images.size()) test_images = F.grid_sample(test_images, flowgrid) ## shuffle ## trainshuffler = np.random.permutation(ntrain) testshuffler = np.random.permutation(ntest) train_images = train_images[np.ix_(trainshuffler), ::].squeeze() train_labs = train_labs[np.ix_(trainshuffler)] test_images = test_images[np.ix_(testshuffler), ::].squeeze() test_labs = test_labs[np.ix_(testshuffler)] if batch_size == ntrain: return train_images, train_labs, test_images, test_labs else: traindata = torch.utils.data.TensorDataset(train_images, train_labs) trainloader = torch.utils.data.DataLoader(traindata, batch_size=batch_size) testdata = torch.utils.data.TensorDataset(test_images, test_labs) testloader = torch.utils.data.DataLoader(testdata, batch_size=batch_size) return trainloader, testloader
def query_rgb(self, coord, cell=None): feat = self.feat if self.imnet is None: ret = F.grid_sample(feat, coord.flip(-1).unsqueeze(1), mode='nearest', align_corners=False)[:, :, 0, :] \ .permute(0, 2, 1) return ret if self.feat_unfold: feat = F.unfold(feat, 3, padding=1).view(feat.shape[0], feat.shape[1] * 9, feat.shape[2], feat.shape[3]) if self.local_ensemble: vx_lst = [-1, 1] vy_lst = [-1, 1] eps_shift = 1e-6 else: vx_lst, vy_lst, eps_shift = [0], [0], 0 # field radius (global: [-1, 1]) rx = 2 / feat.shape[-2] / 2 ry = 2 / feat.shape[-1] / 2 feat_coord = make_coord(feat.shape[-2:], flatten=False).cuda() \ .permute(2, 0, 1) \ .unsqueeze(0).expand(feat.shape[0], 2, *feat.shape[-2:]) preds = [] areas = [] for vx in vx_lst: for vy in vy_lst: coord_ = coord.clone() coord_[:, :, 0] += vx * rx + eps_shift coord_[:, :, 1] += vy * ry + eps_shift coord_.clamp_(-1 + 1e-6, 1 - 1e-6) q_feat = F.grid_sample( feat, coord_.flip(-1).unsqueeze(1), mode='nearest', align_corners=False)[:, :, 0, :] \ .permute(0, 2, 1) q_coord = F.grid_sample( feat_coord, coord_.flip(-1).unsqueeze(1), mode='nearest', align_corners=False)[:, :, 0, :] \ .permute(0, 2, 1) rel_coord = coord - q_coord rel_coord[:, :, 0] *= feat.shape[-2] rel_coord[:, :, 1] *= feat.shape[-1] inp = torch.cat([q_feat, rel_coord], dim=-1) if self.cell_decode: rel_cell = cell.clone() rel_cell[:, :, 0] *= feat.shape[-2] rel_cell[:, :, 1] *= feat.shape[-1] inp = torch.cat([inp, rel_cell], dim=-1) bs, q = coord.shape[:2] pred = self.imnet(inp.view(bs * q, -1)).view(bs, q, -1) preds.append(pred) area = torch.abs(rel_coord[:, :, 0] * rel_coord[:, :, 1]) areas.append(area + 1e-9) tot_area = torch.stack(areas).sum(dim=0) if self.local_ensemble: t = areas[0] areas[0] = areas[3] areas[3] = t t = areas[1] areas[1] = areas[2] areas[2] = t ret = 0 for pred, area in zip(preds, areas): ret = ret + pred * (area / tot_area).unsqueeze(-1) return ret
def pascal_parts_metrics(batch,batch_start_idx,theta_aff,theta_tps,theta_aff_tps,stats,args,use_cuda=True): do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_aff_tps is not None batch_size=batch['source_im_size'].size(0) for b in range(batch_size): idx = batch_start_idx+b h_src = int(batch['source_im_size'][b,0].data.cpu().numpy()) w_src = int(batch['source_im_size'][b,1].data.cpu().numpy()) h_tgt = int(batch['target_im_size'][b,0].data.cpu().numpy()) w_tgt = int(batch['target_im_size'][b,1].data.cpu().numpy()) # do pck if batch['keypoint_A'][b].size!=0: src_points = Variable(torch.FloatTensor(batch['keypoint_A'][b])).unsqueeze(0) tgt_points = Variable(torch.FloatTensor(batch['keypoint_B'][b])).unsqueeze(0) L_pck = Variable(torch.FloatTensor([batch['L_pck'][b]])).unsqueeze(1) if use_cuda: src_points=src_points.cuda() tgt_points=tgt_points.cuda() L_pck = L_pck.cuda() batch_b = {'source_im_size': batch['source_im_size'][b,:].unsqueeze(0), 'target_im_size': batch['target_im_size'][b,:].unsqueeze(0), 'source_points': src_points, 'target_points': tgt_points, 'L_pck': L_pck} args.pck_alpha = 0.05 stats = pck_metric(batch_b, idx, theta_aff[b,:].unsqueeze(0) if do_aff else None, theta_tps[b,:].unsqueeze(0) if do_tps else None, theta_aff_tps[b,:].unsqueeze(0) if do_aff_tps else None, stats,args,use_cuda) else: if do_aff: stats['aff']['pck'][idx] = -1 if do_tps: stats['tps']['pck'][idx] = -1 if do_aff_tps: stats['aff_tps']['pck'][idx] = -1 # do area source_mask = Variable(torch.FloatTensor(batch['part_A'][b].astype(np.float32)).unsqueeze(0).transpose(2,3).transpose(1,2)) target_mask = Variable(torch.FloatTensor(batch['part_B'][b].astype(np.float32)).unsqueeze(0).transpose(2,3).transpose(1,2)) if use_cuda: source_mask = source_mask.cuda() target_mask = target_mask.cuda() grid_aff,grid_tps,grid_aff_tps=theta_to_sampling_grid(h_tgt,w_tgt, theta_aff[b,:] if do_aff else None, theta_tps[b,:] if do_tps else None, theta_aff_tps[b,:] if do_aff_tps else None, use_cuda=use_cuda, tps_reg_factor=args.tps_reg_factor) if do_aff: warped_mask_aff = F.grid_sample(source_mask, grid_aff) stats['aff']['intersection_over_union'][idx] = intersection_over_union(warped_mask_aff,target_mask) if do_tps: warped_mask_tps = F.grid_sample(source_mask, grid_tps) stats['tps']['intersection_over_union'][idx] = intersection_over_union(warped_mask_tps,target_mask) if do_aff_tps: warped_mask_aff_tps = F.grid_sample(source_mask, grid_aff_tps) stats['aff_tps']['intersection_over_union'][idx] = intersection_over_union(warped_mask_aff_tps,target_mask) # if idx==300: # import pdb; pdb.set_trace() return stats
[ 0 ----- --------------- ] [ H - 1 H - 1 ] """ height, width = img_t.size(2), img_t.size(3) # requires grad theta = torch.zeros(1, 2, 3) # do crop theta[:, 0, 0] = (x2 - x1) / (width - 1) theta[:, 0, 2] = (x1 + x2 - width + 1) / (width - 1) theta[:, 1, 1] = (y2 - y1) / (height - 1) theta[:, 1, 2] = (y1 + y2 - height + 1) / (height - 1) grid = F.affine_grid(theta, torch.Size((1, 3, 768, 768))) crop = F.grid_sample(img_t, grid.type(torch.float32), padding_mode='zeros') plt.rcParams['figure.figsize'] = 8, 8 with sns.axes_style('white'): fig, axis = plt.subplots(nrows=1, ncols=2) axis[0].imshow(img_t.data.numpy().squeeze(0).transpose((1, 2, 0))) axis[0].plot(x1, y1, 'r+') axis[0].plot(x2, y2, 'b+') axis[0].set_title('') axis[1].imshow(crop.data.numpy().squeeze(0).transpose((1, 2, 0))) axis[1].set_title('') '''
def forward(self, input_img, input_grid): self.warp = input_grid.permute(0, 2, 3, 1) self.output = F.grid_sample(input_img, self.warp) return self.output
def __getitem__(self, idx): data = self.df.iloc[idx] # get the transformation type flag transform_type = data['aff/tps/h**o'].astype('uint8') source_img_name = osp.join(self.img_path, data.fname) if not os.path.exists(source_img_name): raise ValueError( "The path to one of the original image {} does not exist, check your image path " "and your csv file !".format(source_img_name)) # aff/tps transformations if transform_type == 0 or transform_type == 1: # read image source_img = cv2.cvtColor(cv2.imread(source_img_name), cv2.COLOR_BGR2RGB) # cropping dimention of the image first if it is too big, would occur to big resizing after if source_img.shape[0] > self.H_AFF_TPS*self.ratio_cropping or \ source_img.shape[1] > self.W_AFF_TPS*self.ratio_cropping: source_img, x, y = center_crop( source_img, (int(self.W_AFF_TPS * self.ratio_cropping), int(self.H_AFF_TPS * self.ratio_cropping))) if transform_type == 0: theta = data.iloc[2:8].values.astype('float').reshape(2, 3) theta = torch.Tensor(theta.astype(np.float32)).expand(1, 2, 3) else: theta = data.iloc[2:].values.astype('float') theta = np.expand_dims(np.expand_dims(theta, 1), 2) theta = torch.Tensor(theta.astype(np.float32)) theta = theta.expand(1, 18, 1, 1) # make arrays float tensor for subsequent processing image = torch.Tensor(source_img.astype(np.float32)) if image.numpy().ndim == 2: image = \ torch.Tensor(np.dstack((source_img.astype(np.float32), source_img.astype(np.float32), source_img.astype(np.float32)))) image = image.transpose(1, 2).transpose(0, 1) # Resize image using bilinear sampling with identity affine image_pad = self.transform_image(image.unsqueeze(0), self.H_AFF_TPS, self.W_AFF_TPS) # padding and crop factor depend where to crop and pad image img_src_crop = \ self.transform_image(image_pad, self.H_OUT, self.W_OUT, padding_factor=0.8, crop_factor=9/16).squeeze().numpy() img_target_crop = \ self.transform_image(image_pad, self.H_OUT, self.W_OUT, padding_factor=0.8, crop_factor=9/16, theta=theta).squeeze().numpy() # convert to [H, W, C] convention (for np arrays) img_src_crop = img_src_crop.transpose((1, 2, 0)) img_target_crop = img_target_crop.transpose((1, 2, 0)) # Homography transformation elif transform_type == 2: # ATTENTION CV2 resize is inverted, first w and then h theta = data.iloc[2:11].values.astype('double').reshape(3, 3) source_img = cv2.cvtColor(cv2.imread(source_img_name), cv2.COLOR_BGR2RGB) # cropping dimention of the image first if it is too big, would occur to big resizing after if source_img.shape[0] > self.H_HOMO * self.ratio_cropping \ or source_img.shape[1] > self.W_HOMO*self.ratio_cropping: source_img, x, y = center_crop( source_img, (int(self.W_HOMO * self.ratio_cropping), int(self.H_HOMO * self.ratio_cropping))) # resize to value stated at the beginning img_src_orig = cv2.resize(source_img, dsize=(self.W_HOMO, self.H_HOMO), interpolation=cv2.INTER_LINEAR ) # cv2.resize, W is giving first # get a central crop: img_src_crop, x1_crop, y1_crop = center_crop( img_src_orig, self.W_OUT) # Obtaining the full and crop grids out of H grid_full, grid_crop = self.get_grid(theta, ccrop=(x1_crop, y1_crop)) # warp the fullsize original source image img_src_orig = torch.Tensor(img_src_orig.astype(np.float32)) img_src_orig = img_src_orig.permute(2, 0, 1) if float(torch.__version__[:3]) >= 1.3: img_orig_target_vrbl = F.grid_sample(img_src_orig.unsqueeze(0), grid_full, align_corners=True) else: img_orig_target_vrbl = F.grid_sample(img_src_orig.unsqueeze(0), grid_full) img_orig_target_vrbl = \ img_orig_target_vrbl.squeeze().permute(1, 2, 0) # get the central crop of the target image img_target_crop, _, _ = center_crop(img_orig_target_vrbl.numpy(), self.W_OUT) else: print('Error: transformation type') if self.transforms_source is not None and self.transforms_target is not None: cropped_source_image = \ self.transforms_source(img_src_crop.astype(np.uint8)) cropped_target_image = \ self.transforms_target(img_target_crop.astype(np.uint8)) else: # if no specific transformations are applied, they are just put in 3xHxW cropped_source_image = \ torch.Tensor(img_src_crop.astype(np.float32)) cropped_target_image = \ torch.Tensor(img_target_crop.astype(np.float32)) # convert to [C, H, W] convention (for tensors) cropped_source_image = cropped_source_image.permute(-1, 0, 1) cropped_target_image = cropped_target_image.permute(-1, 0, 1) # construct a pyramid with a corresponding grid on each layer grid_pyramid = [] mask_x = [] mask_y = [] if transform_type == 0: for layer_size in self.pyramid_param: # get layer size or change it so that it corresponds to PWCNet grid = self.generate_grid(layer_size, layer_size, theta).squeeze(0) mask = grid.ge(-1) & grid.le(1) grid_pyramid.append(grid) mask_x.append(mask[:, :, 0]) mask_y.append(mask[:, :, 1]) elif transform_type == 1: grid = self.generate_grid(self.H_OUT, self.W_OUT, theta).squeeze(0) for layer_size in self.pyramid_param: grid_m = torch.from_numpy( cv2.resize(grid.numpy(), (layer_size, layer_size))) mask = grid_m.ge(-1) & grid_m.le(1) grid_pyramid.append(grid_m) mask_x.append(mask[:, :, 0]) mask_y.append(mask[:, :, 1]) elif transform_type == 2: grid = grid_crop.squeeze(0) for layer_size in self.pyramid_param: grid_m = torch.from_numpy( cv2.resize(grid.numpy(), (layer_size, layer_size))) mask = grid_m.ge(-1) & grid_m.le(1) grid_pyramid.append(grid_m) mask_x.append(mask[:, :, 0]) mask_y.append(mask[:, :, 1]) if self.get_flow: # ATTENTION, here we just get the flow of the highest resolution asked, not the pyramid of flows ! flow = unormalise_and_convert_mapping_to_flow( grid_pyramid[-1], output_channel_first=True) mask = mask_x[-1] and mask_y[-1] return { 'source_image': cropped_source_image, 'target_image': cropped_target_image, 'flow_map': flow, # here flow map is 2 x h x w 'correspondence_mask': mask } else: # here we get both the pyramid of mappings and the last mapping (at the highest resolution) return { 'source_image': cropped_source_image, 'target_image': cropped_target_image, 'correspondence_map': grid_pyramid[-1], #torch tensor, h x w x 2 'correspondence_map_pyro': grid_pyramid, 'mask_x': mask_x, 'mask_y': mask_y }
def paired_transform_torch(image, transform, output_size, block_size=(1, 1)): transform = seg_transforms.SegTransformCompose([ transform, datapipe.seg_transforms_cv.SegCVTransformNormalizeToTensor(None, None) ]) pipe = seg_transforms.SegDataPipeline(block_size, transform) torch_device = torch.device('cpu') x0, m0, xf0, x1, m1, xf1 = pipe.prepare_unsupervised_paired_batch([image]) padded_shape = x0.shape[2:4] xf0_to_1 = affine.cat_nx2x3(xf1, affine.inv_nx2x3(xf0)) t_image_xf0 = affine.cv_to_torch(xf0, padded_shape, image.shape[:2]) t_image_xf1 = affine.cv_to_torch(xf1, padded_shape, image.shape[:2]) t_xf0_to_1 = affine.cv_to_torch(xf0_to_1, padded_shape) image_f = img_as_float(image).astype(np.float32) t_image = torch.tensor(image_f.transpose(2, 0, 1)[None, ...], dtype=torch.float, device=torch_device) t_x0 = torch.tensor(x0, dtype=torch.float, device=torch_device) t_m0 = torch.tensor(m0, dtype=torch.float, device=torch_device) t_m1 = torch.tensor(m1, dtype=torch.float, device=torch_device) t_image_xf0 = torch.tensor(t_image_xf0, dtype=torch.float, device=torch_device) t_image_xf1 = torch.tensor(t_image_xf1, dtype=torch.float, device=torch_device) t_xf0_to_1 = torch.tensor(t_xf0_to_1, dtype=torch.float, device=torch_device) output_shape = torch.Size(len(x0), 3, output_size[0], output_size[1]) grid_image0 = F.affine_grid(t_image_xf0, output_shape) grid_image1 = F.affine_grid(t_image_xf1, output_shape) grid_0to1 = F.affine_grid(t_xf0_to_1, output_shape) t_a = F.grid_sample(t_image, grid_image0) t_b = F.grid_sample(t_image, grid_image1) t_x01 = F.grid_sample(t_x0, grid_0to1) t_m01 = F.grid_sample(t_m0, grid_0to1) * t_m1 t_a_np = t_a.detach().cpu().numpy()[0].transpose(1, 2, 0) t_b_np = t_b.detach().cpu().numpy()[0].transpose(1, 2, 0) t_x01_np = t_x01.detach().cpu().numpy()[0].transpose(1, 2, 0) t_m01_np = t_m01.detach().cpu().numpy()[0].transpose(1, 2, 0) x0 = x0[0].transpose(1, 2, 0) x1 = x1[0].transpose(1, 2, 0) return dict(x0=x0, torch0=t_a_np, x1=x1, torch1=t_b_np, x01=t_x01_np, m01=t_m01_np[:, :, 0])
def resample_vol_cuda_Rt(src_vol, R, t, cam_intrinsic = None, d_candi = None, d_candi_new = None, padding_value = 0., output_tensor = False, is_debug = False, PointsDs_ref_cam_coord_in = None): r''' src_vol - src vol. NDHW or NCDHW format if d_candi_new is not None: d_candi : candidate depth values for the src view; d_candi_new : candidate depth values for the ref view. Usually d_candi_new is different from d_candi ''' assert d_candi is not None, 'd_candi should be some np.array object' if src_vol.ndimension() == 4: # src_vol - NDHW _, D, H, W = src_vol.shape elif src_vol.ndimension() == 5: # src_vol - NCDHW _, C, D, H, W = src_vol.shape N = 1 hhfov, hvfov = math.radians(cam_intrinsic['hfov']) * .5, math.radians(cam_intrinsic['vfov']) * .5 # --- 0. Get the sampled points in the ref. view --- # if PointsDs_ref_cam_coord_in is None: PointsDs_ref_cam_coord = torch.zeros(N, D, H, W, 3) if d_candi_new is not None: d_candi_ = d_candi_new else: d_candi_ = d_candi for idx_d, d in enumerate(d_candi_): PointsDs_ref_cam_coord[0, idx_d, :, :, :] \ = d * torch.FloatTensor(cam_intrinsic['unit_ray_array']) PointsDs_ref_cam_coord = PointsDs_ref_cam_coord.cuda() else: PointsDs_ref_cam_coord = PointsDs_ref_cam_coord_in if d_candi_new is not None: z_max, z_min = d_candi.max(), d_candi.min() else: z_max = torch.max(PointsDs_ref_cam_coord[0, :, :, :, 2 ]) z_min = torch.min(PointsDs_ref_cam_coord[0, :, :, :, 2 ]) z_half = (z_max + z_min) * .5 z_radius = (z_max - z_min) * .5 # --- 1. Coordinate transform --- # PointsDs_ref_cam_coord = PointsDs_ref_cam_coord.reshape((-1, 3)).transpose(0,1) # NOTE: (1) For PointsD_src_cam_coord, use the Cartesian coordinate # # (2) We should avoid in-place operation PointsDs_src_cam_coord_tmp = R.matmul( PointsDs_ref_cam_coord ) PointsDs_src_cam_coord_tmp = PointsDs_src_cam_coord_tmp+ \ t.unsqueeze(1).expand(-1, PointsDs_src_cam_coord_tmp.shape[1]) # transform into range [-1, 1] for all dimensions # PointsDs_src_cam_coord = torch.zeros(PointsDs_src_cam_coord_tmp.shape).cuda() PointsDs_src_cam_coord[0, :] = \ PointsDs_src_cam_coord_tmp[0,:] / (PointsDs_src_cam_coord_tmp[2,:] +1e-10) / math.tan( hhfov) PointsDs_src_cam_coord[1, :] = \ PointsDs_src_cam_coord_tmp[1,:] / (PointsDs_src_cam_coord_tmp[2,:] +1e-10) / math.tan( hvfov) PointsDs_src_cam_coord[2, :] = \ (PointsDs_src_cam_coord_tmp[2,:] - z_half ) / z_radius # reshape to N x OD x OH x OW x 3 # PointsDs_src_cam_coord = PointsDs_src_cam_coord.transpose(0,1).reshape((N, D, H, W, 3)) # --- 2. Re-sample --- # if src_vol.ndimension() == 4: src_vol_th = src_vol.unsqueeze(1) elif src_vol.ndimension() == 5: src_vol_th = src_vol src_vol_th_ = _set_vol_border(src_vol_th, padding_value) res_vol_th = torch.squeeze(\ torch.squeeze(\ F.grid_sample(src_vol_th_, PointsDs_src_cam_coord, mode='bilinear', padding_mode = 'border'), dim=0), \ dim=0) if is_debug: return res_vol_th, PointsDs_src_cam_coord, src_vol_th else: return res_vol_th
def forward(self, ind_source, img_source, opt): an = opt.angular_out an2 = opt.angular_out * opt.angular_out N, num_source, h, w = img_source.shape # [N,4,h,w] ind_source = torch.squeeze(ind_source) # [4] h_c = h - 2 * opt.crop_size w_c = w - 2 * opt.crop_size D = opt.psv_step disp_range = torch.linspace(-1 * opt.psv_range, opt.psv_range, steps=D).type_as(img_source) # [D] if self.training: # PSV psv_input = img_source.view(N * num_source, 1, h, w).repeat(D * an2, 1, 1, 1) # [N*an2*D*4,1,h,w] grid = construct_psv_grid(an, D, num_source, ind_source, disp_range, N, h, w) # [N*an2*D*4,h,w,2] PSV = functional.grid_sample(psv_input, grid).view( N, an2, D, num_source, h, w) # [N*an2*D*4,1,h,w]-->[N,an2,D,4,h,w] PSV = crop_boundary(PSV, opt.crop_size) # disparity & confidence estimation perPlane_out = self.conv_perPlane( PSV.view(N * an2 * D, num_source, h_c, w_c)) # [N*an2*D,4,h,w] crossPlane_out = self.conv_crossPlane( perPlane_out.view(N * an2, D * 4, h_c, w_c)) # [N*an2,D,h,w] disp_out = self.conv_disp(crossPlane_out) # [N*an2,5,h,w] disp_target = disp_out[:, 0, :, :].view( N, an2, h_c, w_c) # disparity for each view disp_target = functional.pad(disp_target, pad=[ opt.crop_size, opt.crop_size, opt.crop_size, opt.crop_size ], mode='constant', value=0) conf_source = disp_out[:, 1:, :, :].view( N, an2, num_source, h_c, w_c) # confidence of source views for each view conf_source = self.softmax_d2(conf_source) # intermediate LF warp_img_input = img_source.view(N * num_source, 1, h, w).repeat(an2, 1, 1, 1) # [N*an2*4,1,h,w] grid = construct_syn_grid(an, num_source, ind_source, disp_target, N, h, w) # [N*an2*4,h,w,2] warped_img = functional.grid_sample(warp_img_input, grid).view( N, an2, num_source, h, w) # {N,an2,4,h,w] warped_img = crop_boundary(warped_img, opt.crop_size) inter_lf = torch.sum(warped_img * conf_source, dim=2) # [N,an2,h,w] return disp_target, inter_lf else: inter_lf = torch.zeros((N, an2, h_c, w_c)).type_as(img_source) for k_t in range(0, an2): # for each target view ind_t = torch.arange(an2)[k_t] # disparity & confidence estimation PSV = torch.zeros((N, D, num_source, h, w)).type_as(img_source) for step in range(0, D): for k_s in range(0, num_source): ind_s = ind_source[k_s] disp = disp_range[step] PSV[:, step, k_s] = warping(disp, ind_s, ind_t, img_source[:, k_s], an) PSV = crop_boundary(PSV, opt.crop_size) perPlane_out = self.conv_perPlane( PSV.view(N * D, num_source, h_c, w_c)) # [N*D,4,h,w] crossPlane_out = self.conv_crossPlane( perPlane_out.view(N, D * 4, h_c, w_c)) # [N,D,h,w] disp_out = self.conv_disp(crossPlane_out) # [N,5,h,w] disp_target = disp_out[:, 0, :, :] # [N,h,w] disparity for each view disp_target = functional.pad(disp_target, pad=[ opt.crop_size, opt.crop_size, opt.crop_size, opt.crop_size ], mode='constant', value=0) conf_source = disp_out[:, 1:, :, :] # [N,4,h_c,w_c] confidence of source views for each view conf_source_norm = self.softmax_d1(conf_source) # warping source views warped_img = torch.zeros(N, num_source, h, w).type_as(img_source) for k_s in range(0, num_source): ind_s = ind_source[k_s] disp = disp_target warped_img[:, k_s] = warping(disp, ind_s, ind_t, img_source[:, k_s], an) warped_img = crop_boundary(warped_img, opt.crop_size) inter_view = torch.sum(warped_img * conf_source_norm, dim=1) # [N,h,w] inter_lf[:, k_t] = inter_view return inter_lf
def unproject_heatmaps(heatmaps, proj_matricies, coord_volumes, volume_aggregation_method='sum', vol_confidences=None): device = heatmaps.device batch_size, n_views, n_joints, heatmap_shape = heatmaps.shape[0], heatmaps.shape[1], heatmaps.shape[2], tuple(heatmaps.shape[3:]) # 1,4,32,96x96 volume_shape = coord_volumes.shape[1:4] #64x64x64 volume_batch = torch.zeros(batch_size, n_joints, *volume_shape, device=device) # 1x32x64x64x64のTensor # TODO: speed up this this loop for batch_i in range(batch_size): coord_volume = coord_volumes[batch_i] # Bx64x64x64x3 -> 64x64x64x3 grid_coord = coord_volume.reshape((-1, 3)) # 262144x3 volume_batch_to_aggregate = torch.zeros(n_views, n_joints, *volume_shape, device=device) # 4x32x64x64x64 for view_i in range(n_views): heatmap = heatmaps[batch_i, view_i] # 1x4x32x96x96 -> 32x96x96 heatmap = heatmap.unsqueeze(0) # 1x32x96x96 (一番初めに次元を追加) grid_coord_proj = multiview.project_3d_points_to_image_plane_without_distortion( # 262144x3 proj_matricies[batch_i, view_i], grid_coord, convert_back_to_euclidean=False ) invalid_mask = grid_coord_proj[:, 2] <= 0.0 # depth must be larger than 0.0 #人がカメラに近づきすぎた場合に起こる?? grid_coord_proj[grid_coord_proj[:, 2] == 0.0, 2] = 1.0 # not to divide by zero grid_coord_proj = multiview.homogeneous_to_euclidean(grid_coord_proj) # transform to [-1.0, 1.0] range grid_coord_proj_transformed = torch.zeros_like(grid_coord_proj) # 262144x2 grid_coord_proj_transformed[:, 0] = 2 * (grid_coord_proj[:, 0] / heatmap_shape[0] - 0.5) # (0,0)->(96,96)の座標を、中心を(0,0)、左上を(-1,-1)、右下を(1,1)とする相対的な座標に変換 grid_coord_proj_transformed[:, 1] = 2 * (grid_coord_proj[:, 1] / heatmap_shape[1] - 0.5) grid_coord_proj = grid_coord_proj_transformed # prepare to F.grid_sample grid_coord_proj = grid_coord_proj.unsqueeze(1).unsqueeze(0) # 引数で指定された場所に一つ次元を足すらしい 1x262144x1x2。heatmapが1x32x96x96 try: current_volume = F.grid_sample(heatmap, grid_coord_proj, align_corners=True) # 1x32x262144x1 = Heatmap(1x32x96x96), grid_coord_proj(1x262144x1x2) except TypeError: # old PyTorch current_volume = F.grid_sample(heatmap, grid_coord_proj) # zero out non-valid points current_volume = current_volume.view(n_joints, -1) #32x262144 current_volume[:, invalid_mask] = 0.0 # reshape back to volume current_volume = current_volume.view(n_joints, *volume_shape) #32x64x64x64 # collect volume_batch_to_aggregate[view_i] = current_volume # agregate resulting volume if volume_aggregation_method.startswith('conf'): volume_batch[batch_i] = (volume_batch_to_aggregate * vol_confidences[batch_i].view(n_views, n_joints, 1, 1, 1)).sum(0) elif volume_aggregation_method == 'sum': volume_batch[batch_i] = volume_batch_to_aggregate.sum(0) elif volume_aggregation_method == 'max': volume_batch[batch_i] = volume_batch_to_aggregate.max(0)[0] elif volume_aggregation_method == 'softmax': volume_batch_to_aggregate_softmin = volume_batch_to_aggregate.clone() # 2x32x64x64x64(n_views, n_joints, *volume_shape) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(n_views, -1) # reshape volume_batch_to_aggregate_softmin = nn.functional.softmax(volume_batch_to_aggregate_softmin, dim=0) volume_batch_to_aggregate_softmin = volume_batch_to_aggregate_softmin.view(n_views, n_joints, *volume_shape) #reshape back volume_batch[batch_i] = (volume_batch_to_aggregate * volume_batch_to_aggregate_softmin).sum(0) else: raise ValueError("Unknown volume_aggregation_method: {}".format(volume_aggregation_method)) return volume_batch
th11 = scalex * math.cos(angle) th12 = -math.sin(angle) * scaley * input_H/input_W th13 = (2 * xc - input_W - 1) / (input_W - 1) th21 = math.sin(angle) * scalex * input_W/input_H th22 = scaley * math.cos(angle) th23 = (2 * yc - input_H - 1) / (input_H - 1) t = np.asarray([th11, th12, th13, th21, th22, th23], dtype=np.float) t = torch.from_numpy(t).type(torch.FloatTensor) t = t.cuda() theta = t.view(-1, 2, 3) grid = F.affine_grid(theta, torch.Size((1, 3, int(target_h), int(target_gw)))) x = F.grid_sample(im_data, grid) h2 = 2 * h2 scalex = (w2 + int( 2 * h2)) / input_W scaley = h2 / input_H th11 = scalex * math.cos(angle) th12 = -math.sin(angle) * scaley th13 = (2 * xc - input_W - 1) / (input_W - 1) #* torch.cos(angle_var) - (2 * yc - input_H - 1) / (input_H - 1) * torch.sin(angle_var) th21 = math.sin(angle) * scalex th22 = scaley * math.cos(angle) th23 = (2 * yc - input_H - 1) / (input_H - 1) #* torch.cos(angle_var) + (2 * xc - input_W - 1) / (input_W - 1) * torch.sin(angle_var) t = np.asarray([th11, th12, th13, th21, th22, th23], dtype=np.float)
def warp_affine(src: torch.Tensor, M: torch.Tensor, dsize: Tuple[int, int], mode: str = 'bilinear', padding_mode: str = 'zeros', align_corners: Optional[bool] = None) -> torch.Tensor: r"""Applies an affine transformation to a tensor. The function warp_affine transforms the source tensor using the specified matrix: .. math:: \text{dst}(x, y) = \text{src} \left( M_{11} x + M_{12} y + M_{13} , M_{21} x + M_{22} y + M_{23} \right ) Args: src (torch.Tensor): input tensor of shape :math:`(B, C, H, W)`. M (torch.Tensor): affine transformation of shape :math:`(B, 2, 3)`. dsize (Tuple[int, int]): size of the output image (height, width). mode (str): interpolation mode to calculate output values 'bilinear' | 'nearest'. Default: 'bilinear'. padding_mode (str): padding mode for outside grid values 'zeros' | 'border' | 'reflection'. Default: 'zeros'. align_corners (bool, optional): mode for grid_generation. Default: None. Returns: torch.Tensor: the warped tensor with shape :math:`(B, C, H, W)`. Example: >>> img = torch.rand(1, 4, 5, 6) >>> A = torch.eye(2, 3)[None] >>> out = warp_affine(img, A, (4, 2), align_corners=True) >>> print(out.shape) torch.Size([1, 4, 4, 2]) .. note:: This function is often used in conjuntion with :func:`get_rotation_matrix2d`, :func:`get_shear_matrix2d`, :func:`get_affine_matrix2d`, :func:`invert_affine_transform`. .. note:: See a working example `here <https://kornia.readthedocs.io/en/latest/ tutorials/warp_affine.html>`__. """ if not isinstance(src, torch.Tensor): raise TypeError("Input src type is not a torch.Tensor. Got {}".format( type(src))) if not isinstance(M, torch.Tensor): raise TypeError("Input M type is not a torch.Tensor. Got {}".format( type(M))) if not len(src.shape) == 4: raise ValueError("Input src must be a BxCxHxW tensor. Got {}".format( src.shape)) if not (len(M.shape) == 3 or M.shape[-2:] == (2, 3)): raise ValueError("Input M must be a Bx2x3 tensor. Got {}".format( M.shape)) # TODO: remove the statement below in kornia v0.6 if align_corners is None: message: str = ( "The align_corners default value has been changed. By default now is set True " "in order to match cv2.warpAffine. In case you want to keep your previous " "behaviour set it to False. This warning will disappear in kornia > v0.6." ) warnings.warn(message) # set default value for align corners align_corners = True B, C, H, W = src.size() # we generate a 3x3 transformation matrix from 2x3 affine M_3x3: torch.Tensor = convert_affinematrix_to_homography(M) dst_norm_trans_src_norm: torch.Tensor = normalize_homography( M_3x3, (H, W), dsize) src_norm_trans_dst_norm = torch.inverse(dst_norm_trans_src_norm) grid = F.affine_grid(src_norm_trans_dst_norm[:, :2, :], [B, C, dsize[0], dsize[1]], align_corners=align_corners) return F.grid_sample(src, grid, align_corners=align_corners, mode=mode, padding_mode=padding_mode)
def forward(self, p, x): # x = x.unsqueeze(1) p_features = p.transpose(1, -1) p = p.unsqueeze(1).unsqueeze(1) p = torch.cat([p + d for d in self.displacments], dim=2) feature_0 = F.grid_sample(x, p, padding_mode='border') # print(feature_0.shape) # print(feature_0[:,:,:,0,0]) net = self.actvn(self.conv_in(x)) net = self.conv_in_bn(net) feature_1 = F.grid_sample(net, p, padding_mode='border') net = self.maxpool(net) #out 128 net = self.actvn(self.conv_0(net)) net = self.actvn(self.conv_0_1(net)) net = self.conv0_1_bn(net) feature_2 = F.grid_sample(net, p, padding_mode='border') net = self.maxpool(net) #out 64 net = self.actvn(self.conv_1(net)) net = self.actvn(self.conv_1_1(net)) net = self.conv1_1_bn(net) feature_3 = F.grid_sample(net, p, padding_mode='border') net = self.maxpool(net) net = self.actvn(self.conv_2(net)) net = self.actvn(self.conv_2_1(net)) net = self.conv2_1_bn(net) feature_4 = F.grid_sample(net, p, padding_mode='border') net = self.maxpool(net) net = self.actvn(self.conv_3(net)) net = self.actvn(self.conv_3_1(net)) net = self.conv3_1_bn(net) feature_5 = F.grid_sample(net, p, padding_mode='border') net = self.maxpool(net) net = self.actvn(self.conv_4(net)) net = self.actvn(self.conv_4_1(net)) net = self.conv4_1_bn(net) feature_6 = F.grid_sample(net, p, padding_mode='border') # here every channel corresponse to one feature. features = torch.cat((feature_0, feature_1, feature_2, feature_3, feature_4, feature_5, feature_6), dim=1) # (B, features, 1,7,sample_num) shape = features.shape features = torch.reshape( features, (shape[0], shape[1] * shape[3], shape[4])) # (B, featues_per_sample, samples_num) features = torch.cat( (features, p_features), dim=1) # (B, featue_size, samples_num) samples_num 0->0,...,N->N # print('features: ', features[:,:,:3]) net = self.actvn(self.fc_0(features)) net = self.actvn(self.fc_1(net)) net = self.actvn(self.fc_2(net)) out = self.fc_out(net) # out = net.squeeze(1) return out