def _crop_pool_layer(bottom, rois, max_pool=True): # code modified from # https://github.com/ruotianluo/pytorch-faster-rcnn # implement it using stn # box to affine # input (x1,y1,x2,y2) """ [ x2-x1 x1 + x2 - W + 1 ] [ ----- 0 --------------- ] [ W - 1 W - 1 ] [ ] [ y2-y1 y1 + y2 - H + 1 ] [ 0 ----- --------------- ] [ H - 1 H - 1 ] """ rois = rois.detach() batch_size = bottom.size(0) D = bottom.size(1) H = bottom.size(2) W = bottom.size(3) roi_per_batch = rois.size(0) / batch_size x1 = rois[:, 1::4] / 16.0 y1 = rois[:, 2::4] / 16.0 x2 = rois[:, 3::4] / 16.0 y2 = rois[:, 4::4] / 16.0 height = bottom.size(2) width = bottom.size(3) # affine theta zero = Variable(rois.data.new(rois.size(0), 1).zero_()) theta = torch.cat([\ (x2 - x1) / (width - 1), zero, (x1 + x2 - width + 1) / (width - 1), zero, (y2 - y1) / (height - 1), (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) if max_pool: pre_pool_size = cfg.POOLING_SIZE * 2 grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ .contiguous().view(-1, D, H, W) crops = F.grid_sample(bottom, grid) crops = F.max_pool2d(crops, 2, 2) else: grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE))) bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ .contiguous().view(-1, D, H, W) crops = F.grid_sample(bottom, grid) return crops, grid
def stn(self, z, c): zs = z.view(-1, 10 * 3 * 3) theta = self.fc_loc(zs) theta = theta.view(-1, 2, 3) grid = F.affine_grid(theta, c.size()) cond = F.grid_sample(c, grid) return cond
def window_to_image(z_where, window_size, image_size, windows): n = windows.size(0) assert windows.size(1) == window_size ** 2, 'Size mismatch.' theta = expand_z_where(z_where) grid = F.affine_grid(theta, torch.Size((n, 1, image_size, image_size))) out = F.grid_sample(windows.view(n, 1, window_size, window_size), grid) return out.view(n, image_size, image_size)
def image_to_window(z_where, window_size, image_size, images): n = images.size(0) assert images.size(1) == images.size(2) == image_size, 'Size mismatch.' theta_inv = expand_z_where(z_where_inv(z_where)) grid = F.affine_grid(theta_inv, torch.Size((n, 1, window_size, window_size))) out = F.grid_sample(images.view(n, 1, image_size, image_size), grid) return out.view(n, -1)
def forward(self, theta): b=theta.size()[0] if not theta.size()==(b,2,3): theta = theta.view(-1,2,3) theta = theta.contiguous() batch_size = theta.size()[0] out_size = torch.Size((batch_size,self.out_ch,self.out_h,self.out_w)) return F.affine_grid(theta, out_size)
def stn(self, x): xs = self.localization(x) xs = xs.view(-1, 10 * 3 * 3) theta = self.fc_loc(xs) theta = theta.view(-1, 2, 3) grid = F.affine_grid(theta, x.size()) x = F.grid_sample(x, grid) return x
def init_coord_feature_map(self, required_batch_rows_cols, device): if self.coord_feature_map is not None: if (self.coord_feature_map.shape[0] == required_batch_rows_cols[0] and self.coord_feature_map.shape[2] == required_batch_rows_cols[1] and self.coord_feature_map.shape[3] == required_batch_rows_cols[2]): return theta = torch.FloatTensor([1, 0, 0, 0, 1, 0]).view(1, 2, 3) theta = torch.cat((theta, ) * required_batch_rows_cols[0], dim=0) self.coord_feature_map = \ F.affine_grid(theta, (required_batch_rows_cols[0], 1, required_batch_rows_cols[1], required_batch_rows_cols[2]) ).transpose(1, 3).to(device)
def find_tensor_peak_batch(heatmap, radius, downsample, threshold = 0.000001): assert heatmap.dim() == 3, 'The dimension of the heatmap is wrong : {}'.format(heatmap.size()) assert radius > 0 and isinstance(radius, numbers.Number), 'The radius is not ok : {}'.format(radius) num_pts, H, W = heatmap.size(0), heatmap.size(1), heatmap.size(2) assert W > 1 and H > 1, 'To avoid the normalization function divide zero' # find the approximate location: score, index = torch.max(heatmap.view(num_pts, -1), 1) index_w = (index % W).float() index_h = (index / W).float() def normalize(x, L): return -1. + 2. * x.data / (L-1) boxes = [index_w - radius, index_h - radius, index_w + radius, index_h + radius] boxes[0] = normalize(boxes[0], W) boxes[1] = normalize(boxes[1], H) boxes[2] = normalize(boxes[2], W) boxes[3] = normalize(boxes[3], H) #affine_parameter = [(boxes[2]-boxes[0])/2, boxes[0]*0, (boxes[2]+boxes[0])/2, # boxes[0]*0, (boxes[3]-boxes[1])/2, (boxes[3]+boxes[1])/2] #theta = torch.stack(affine_parameter, 1).view(num_pts, 2, 3) affine_parameter = torch.zeros((num_pts, 2, 3)) affine_parameter[:,0,0] = (boxes[2]-boxes[0])/2 affine_parameter[:,0,2] = (boxes[2]+boxes[0])/2 affine_parameter[:,1,1] = (boxes[3]-boxes[1])/2 affine_parameter[:,1,2] = (boxes[3]+boxes[1])/2 # extract the sub-region heatmap theta = affine_parameter.to(heatmap.device) grid_size = torch.Size([num_pts, 1, radius*2+1, radius*2+1]) grid = F.affine_grid(theta, grid_size) sub_feature = F.grid_sample(heatmap.unsqueeze(1), grid).squeeze(1) sub_feature = F.threshold(sub_feature, threshold, np.finfo(float).eps) X = torch.arange(-radius, radius+1).to(heatmap).view(1, 1, radius*2+1) Y = torch.arange(-radius, radius+1).to(heatmap).view(1, radius*2+1, 1) sum_region = torch.sum(sub_feature.view(num_pts,-1),1) x = torch.sum((sub_feature*X).view(num_pts,-1),1) / sum_region + index_w y = torch.sum((sub_feature*Y).view(num_pts,-1),1) / sum_region + index_h x = x * downsample + downsample / 2.0 - 0.5 y = y * downsample + downsample / 2.0 - 0.5 return torch.stack([x, y],1), score
def warp_feature(feature, pts_location, patch_size): # pts_location is [X,Y], patch_size is [H,W] C, H, W = feature.size(0), feature.size(1), feature.size(2) def normalize(x, L): return -1. + 2. * x / (L-1) crop_box = [pts_location[0]-patch_size[1], pts_location[1]-patch_size[0], pts_location[0]+patch_size[1], pts_location[1]+patch_size[0]] crop_box[0] = normalize(crop_box[0], W) crop_box[1] = normalize(crop_box[1], H) crop_box[2] = normalize(crop_box[2], W) crop_box[3] = normalize(crop_box[3], H) affine_parameter = [(crop_box[2]-crop_box[0])/2, MU.np2variable(torch.zeros(1),feature.is_cuda,False), (crop_box[0]+crop_box[2])/2, MU.np2variable(torch.zeros(1),feature.is_cuda,False), (crop_box[3]-crop_box[1])/2, (crop_box[1]+crop_box[3])/2] affine_parameter = torch.cat(affine_parameter).view(2, 3) theta = affine_parameter.unsqueeze(0) feature = feature.unsqueeze(0) grid_size = torch.Size([1, 1, 2*patch_size[0]+1, 2*patch_size[1]+1]) grid = F.affine_grid(theta, grid_size) sub_feature = F.grid_sample(feature, grid).squeeze(0) return sub_feature
def _crop_pool_layer(self, bottom, rois, max_pool=True): # implement it using stn # box to affine # input (x1,y1,x2,y2) """ [ x2-x1 x1 + x2 - W + 1 ] [ ----- 0 --------------- ] [ W - 1 W - 1 ] [ ] [ y2-y1 y1 + y2 - H + 1 ] [ 0 ----- --------------- ] [ H - 1 H - 1 ] """ rois = rois.detach() x1 = rois[:, 1::4] / 16.0 y1 = rois[:, 2::4] / 16.0 x2 = rois[:, 3::4] / 16.0 y2 = rois[:, 4::4] / 16.0 height = bottom.size(2) width = bottom.size(3) # affine theta theta = Variable(rois.data.new(rois.size(0), 2, 3).zero_()) theta[:, 0, 0] = ((x2 - x1) / (width - 1)).view(-1) theta[:, 0 ,2] = ((x1 + x2 - width + 1) / (width - 1)).view(-1) theta[:, 1, 1] = ((y2 - y1) / (height - 1)).view(-1) theta[:, 1, 2] = ((y1 + y2 - height + 1) / (height - 1)).view(-1) pre_pool_size = cfg.POOLING_SIZE * 2 if max_pool else cfg.POOLING_SIZE grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) torch.backends.cudnn.enabled = False crops = F.grid_sample(bottom.expand(rois.size(0), bottom.size(1), bottom.size(2), bottom.size(3)), grid) torch.backends.cudnn.enabled = True if max_pool: crops = F.max_pool2d(crops, 2, 2) return crops
def test_grid_sample(mode, padding_mode, align_corners): from mmcv.onnx.symbolic import register_extra_symbolics opset_version = 11 register_extra_symbolics(opset_version) from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') input = torch.rand(1, 1, 10, 10) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = F.affine_grid(grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input) def func(input, grid): return F.grid_sample(input, grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners) return process_grid_sample(func, input, grid, ort_custom_op_path)
def image_to_object(images, pose, object_size): ''' Inverse pose, crop and transform image patches. param images: (... x C x H x W) tensor param pose: (N x 3) tensor ''' # Note: Images: [1280, 1, 64, 64], # Pose: [1280, 3] N, pose_size = pose.size() n_channels, H, W = images.size()[-3:] images = images.view(N, n_channels, H, W) if pose_size == 3: transformer_inv = expand_pose(pose_inv(pose)) # [s, x, y] -> [[s, 0, x], # [0, s, y]] elif pose_size == 6: transformer_inv = pose_inv_full( pose) # Note: inverse of the affine matrix grid = F.affine_grid(transformer_inv, torch.Size((N, n_channels, object_size, object_size))) obj = F.grid_sample(images, grid) return obj
def translate_rotate(img, disp=16, angle=2): """ displacement in pixels, angle in degrees""" h, w, _ = tuple(img.size()) [dx, dy] = [random.uniform(-disp, disp) / w for i in range(2)] angle = random.uniform(-angle, angle) * math.pi / 180 thetas = np.zeros((h, 2, 3)) thetas[:, 0, 2] = dx thetas[:, 1, 2] = dy thetas[:, 0, 0] += np.cos(angle) thetas[:, 1, 0] -= np.sin(angle) thetas[:, 0, 1] += np.sin(angle) thetas[:, 1, 1] += np.cos(angle) thetas = Variable(torch.Tensor(thetas), requires_grad=False) # H, 2, 3 grid = F.affine_grid(thetas, torch.Size((h, 2, w, w))) res = F.grid_sample(img.unsqueeze(1), grid).squeeze() del grid, thetas return res.unsqueeze(0)
def forward(self, inputs, whbias=None): _device = inputs.device N = inputs.size(0) _theta = self._eye.repeat(N, 1, 1) if whbias is None: whbias = self._sample_latent(inputs) _theta[:, 0, 0] = whbias[:, 0] _theta[:, 1, 1] = whbias[:, 1] _theta[:, 0, 2] = whbias[:, 2] _theta[:, 1, 2] = whbias[:, 3] grid = F.affine_grid(_theta, inputs.size(), **kwargs).to(_device) output = F.grid_sample(inputs, grid, padding_mode='reflection', **kwargs) if self.size is not None: output = F.adaptive_avg_pool2d(output, self.size) return output
def forward(self, input_g): transform_t = self._build2dTransformMatrix() affine_t = F.affine_grid( transform_t[:3] .unsqueeze(0) .expand(input_g.size(0), -1, -1) .to(input_g.device, torch.float32), input_g.size(), align_corners=False, ) augmented_input_g = F.grid_sample( input_g, affine_t, padding_mode="border", align_corners=False ) if self.noise: noise_t = torch.randn_like(augmented_input_g) noise_t *= self.noise augmented_input_g += noise_t return augmented_input_g
def forward(self, x): bsz, c, t, h, w = x.size() output = torch.zeros_like(x).cuda() theta = torch.zeros((bsz, 2, 3)).cuda() for j in range(bsz): # theta[j] = self.rotate() theta[j] = self.scale() # theta[j] = self.translation() # theta[j] = self.shear() # theta[j] = self.reflection() grid = F.affine_grid(theta, (bsz, c, h, w)) for i in range(t): output[:, :, i, :, :] = F.grid_sample(x[:, :, i, :, :], grid) # new_theta = self.maxtrix_padding(theta, (bsz, 3, 3)) # # for each sample in batch size, should get inverse # inverse_theta = new_theta.clone() # for j in range(bsz): # inverse_theta[j] = torch.inverse(new_theta[j]) # inverse_theta = self.matrix_reduce(inverse_theta, (bsz, 2, 3)) # inverse_grid = F.affine_grid(inverse_theta, (bsz, c, h, w)) # for i in range(t): # output[:, :, i, :, :] = F.grid_sample(x[:, :, i, :, :], inverse_grid) return output
def stn(x, theta, mode='rotation', reduce_ratio=28 / 224): rr = reduce_ratio if mode == 'affine': theta1 = theta.view(-1, 2, 3) else: theta1 = Variable(torch.zeros([x.size(0), 2, 3], dtype=torch.float32, device=x.get_device()), requires_grad=True) theta1 = theta1 + 0 theta1[:, 0, 0] = 1.0 theta1[:, 1, 1] = 1.0 if mode == 'rotation': angle = theta[:, 0] theta1[:, 0, 0] = torch.cos(angle) * rr theta1[:, 0, 1] = -torch.sin(angle) * rr theta1[:, 1, 0] = torch.sin(angle) * rr theta1[:, 1, 1] = torch.cos(angle) * rr target_size = [x.size(0), x.size(1), 28, 28] grid = F.affine_grid(theta1, target_size) x = F.grid_sample(x, grid) return x
def _affine_grid_gen(rois, input_size, grid_size): rois = rois.detach() x1 = rois[:, 1::4] / 16.0 y1 = rois[:, 2::4] / 16.0 x2 = rois[:, 3::4] / 16.0 y2 = rois[:, 4::4] / 16.0 height = input_size[0] width = input_size[1] zero = Variable(rois.data.new(rois.size(0), 1).zero_()) theta = torch.cat([\ (x2 - x1) / (width - 1), zero, (x1 + x2 - width + 1) / (width - 1), zero, (y2 - y1) / (height - 1), (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size))) return grid
def forward(self, images, position, zoom, hidden=None): batch_size = images.shape[0] # do affine transformation theta = position * self.theta_position + torch.exp( zoom) * self.theta_zoom grid = F.affine_grid( theta, torch.Size((batch_size, 3, self.affine_size, self.affine_size))) x = F.grid_sample(images, grid) x = self.avg_pool(x) # resnet layers x = self.resnet(x) x = self.maxPool(x) x = F.sigmoid(self.conv(x)) x = x.view(-1, 100) # add recursion x = torch.cat( [x, torch.squeeze(torch.cat([position, zoom], dim=1), dim=2)], dim=1) # fully connected layer x_tmp = F.sigmoid(self.lin1(x)) x = F.sigmoid(self.lin2(x_tmp)) x = F.sigmoid(self.lin3(x)) x = F.sigmoid(self.lin4(x)) x = F.tanh(self.lin5(torch.cat([x, x_tmp], dim=1))) # update recurrence / position / zoom x = x.unsqueeze(2) position = position + torch.exp(zoom) * x[:, 0:2] zoom = zoom + x[:, 2:3] return position, zoom, hidden
def eval_error(self): loss_list = [] for batch in self.eval_loader: image, labels = batch['image'].to( self.device), batch['labels'].to(self.device) orig = batch['orig'].to(self.device) orig_label = batch['orig_label'].to(self.device) n, l, h, w = orig.shape theta = self.model(labels) cens = calc_centroid(orig_label) assert cens.shape == (n, 9, 2) points = torch.cat([cens[:, 1:6], cens[:, 6:9].mean(dim=1, keepdim=True)], dim=1) theta_label = torch.zeros( (n, 6, 2, 3), device=self.device, requires_grad=False) for i in range(6): theta_label[:, i, 0, 0] = (81. - 1.) / (w - 1) theta_label[:, i, 0, 2] = -1. + (2. * points[:, i, 1]) / (w - 1) theta_label[:, i, 1, 1] = (81. - 1.) / (h - 1) theta_label[:, i, 1, 2] = -1. + (2. * points[:, i, 0]) / (h - 1) loss = self.metric(theta, theta_label) loss_list.append(loss.item()) temp = [] for i in range(theta.shape[1]): test = theta[:, i] grid = F.affine_grid(theta=test, size=[n, 3, 81, 81], align_corners=True) temp.append(F.grid_sample(input=orig, grid=grid, align_corners=True)) parts = torch.stack(temp, dim=1) assert parts.shape == (n, 6, 3, 81, 81) for i in range(6): parts_grid = torchvision.utils.make_grid( parts[:, i].detach().cpu()) self.writer.add_image('croped_parts_%s_%d' % (uuid_8, i), parts_grid, self.step) return np.mean(loss_list)
def forward(self, A: Tensor, L: Tensor, T: int) -> Tensor: """ Given a set of predicted actions probabilities A_{i}s, upsamples them w.r.t. the given projected L_{i}s. :param L: [K] The projected lengths. :param A: [K x C] The predicted actions' probabilities. :return: [1 x C x ~T] Upsampled A_{k}s. """ A = A.squeeze().permute(1, 0) # [K x C] L = L.squeeze() # [K] L_prime = project_lengths_softmax(T=T, L=L) K = A.shape[0] C = A.shape[1] l_max = int(L_prime.max() + 0.5) # round to the nearest int pis = torch.zeros_like(L_prime) # [K] normalized_l = self._normalize_scale(l_max, L_prime) normalized_p = self._normalize_location(l_max, pis, L_prime) params_mat = self._create_params_matrix(normalized_l, normalized_p) # [K x 3] theta = self._create_theta(params_mat) # [K x 2 x 3] grid = F.affine_grid(theta, torch.Size((K, C, 1, l_max))) temp_A = A.view(K, C, 1, 1).expand(-1, -1, -1, self.temp_width) upsampled_probs = F.grid_sample(temp_A, grid, mode="bilinear") upsampled_probs = upsampled_probs.view(K, C, l_max) # [K x C x l_max] upsampled_cropped = [] for i, prob in enumerate(upsampled_probs): prob_cropped = prob[:, 0:round(L_prime[i].item())] upsampled_cropped.append(prob_cropped) out = torch.cat(upsampled_cropped, dim=1).unsqueeze(dim=0) # [1 x C x ~T] out = F.interpolate(input=out, size=T) # [1 x C x T] return out # [1 x C x T]
def forward(self, sample, support=False): # do the actual forward passes # dropout probability for dropping the final theta and putting # default value of [1....10] self.identity_transform = self.identity_transform.to(sample.device) if self.training and not support: dropout = self.dropout else: dropout = 1 sample = Variable(sample) inp_flatten = sample # do the forward pass theta = self.module(inp_flatten) theta = theta + 0 # Scale it to have any values B = sample.shape[0] U = torch.rand(B) idx = (U <= dropout).nonzero().squeeze() theta[idx, :] = self.identity_transform # constrain if enabled if self.constrained: theta = self.clipper.clip(theta) # print(theta) # change the shape theta = theta.view(-1, 2, 3) grid = F.affine_grid(theta, inp_flatten.size(), align_corners=True) results = F.grid_sample(inp_flatten, grid, padding_mode="border", align_corners=True) transform = theta return results, transform, {}
def plot(self, images): perrow = 5 num, c, w, h = images.size() rows = int(math.ceil(num/perrow)) thetas = self.preprocess(images) thetas = thetas * self.scale + self.identity[None, None, :, :] b, g, _, _ = thetas.size() grid = F.affine_grid( thetas.view(b*g, 2, 3), torch.Size((b*g, self.in_size[0], self.k, self.k)) ) means = grid.view(b, -1, 2).data.cpu() # scale to image resolution means = ((means + 1.0) * 0.5) * torch.tensor(self.in_size[1:], dtype=torch.float)[None, None, :] b, k, _ = means.size() sigmas = torch.ones((b, k, 2)) * 0.001 values = torch.ones((b, k)) images = images.data plt.figure(figsize=(perrow * 3, rows*3)) for i in range(num): ax = plt.subplot(rows, perrow, i+1) im = np.transpose(images[i, :, :, :].cpu().numpy(), (1, 2, 0)) im = np.squeeze(im) ax.imshow(im, interpolation='nearest', extent=(-0.5, w-0.5, -0.5, h-0.5), cmap='gray_r') util.plot(means[i, :, :].unsqueeze(0), sigmas[i, :, :].unsqueeze(0), values[i, :].unsqueeze(0), axes=ax, flip_y=h, alpha_global=0.8/self.num_glimpses) plt.gcf()
def extract_patch(self, x, l): """ Extract a single patch for each image in the minibatch `x`. Args ---- - x: a 4D Tensor of shape (B, C, H, W). The minibatch of images. - l: a 2D Tensor of shape (B, 2). Returns ------- - patch: a 4D Tensor of shape (B, C, size, size) """ size = self.g B, C, H, W = x.shape # calculate coordinate for each batch samle (padding considered) from_x, from_y = l[:, 0], l[:, 1] # build fluid-flow grid if self.use_gpu: theta = torch.cuda.FloatTensor(B * 2, 3).fill_(0) else: theta = torch.zeros(B * 2, 3) # see onenote of affine transform for this algorithm (Pytorch theta is different with cv2's affine matrix) theta[torch.arange(0, B * 2, 2), 0] = size / W theta[torch.arange(1, B * 2, 2), 1] = size / H theta[torch.arange(0, B * 2, 2), 2] = from_x theta[torch.arange(1, B * 2, 2), 2] = from_y theta = theta.reshape((B, 2, 3)) grid = F.affine_grid(theta, torch.Size((B, C, size, size))) return F.grid_sample(x, grid, mode='nearest', padding_mode='zeros') #padding_mode='reflection'
def stn_loss(features,motion,pose,pose_mask_reg=0.0): # ignore_index=255, view as motion motion=torch.clamp(motion,min=0,max=1) n=len(features) total_loss=0 for i in range(n-1): theta=pose[:,i,:].view(-1,2,3) grid=F.affine_grid(theta,features[i+1].size()) # loss=F.l1_loss(features[0],features[i+1],reduction='none') loss=torch.abs(features[0]-features[i+1]) loss=torch.clamp(loss,min=0,max=2.0) if pose_mask_reg<-1: total_loss+=torch.mean(loss) elif pose_mask_reg<0: shape=features[i+1].shape s=int(shape[2]*0.1) e=int(shape[2]*0.9) pose_mask=torch.zeros_like(features[i+1]) pose_mask[:,:,s:e,s:e]=1 assert s>0 and e>0,'start index and end index must large than 0' total_loss+=torch.mean(loss*pose_mask) elif pose_mask_reg==0: shape=features[i+1].shape s=int(shape[2]*0.1) e=int(shape[2]*0.9) pose_mask=torch.zeros_like(features[i+1]) pose_mask[:,:,s:e,s:e]=1 assert s>0 and e>0,'start index and end index must large than 0' total_loss+=torch.mean(loss*(1-motion)*pose_mask) elif pose_mask_reg>0: pose_mask=F.grid_sample(torch.ones_like(features[i+1]),grid) total_loss+=torch.mean(loss*(1-motion)*pose_mask)+pose_mask_reg*torch.mean(1-pose_mask) else: assert False,'pose mask reg error' return total_loss
def forward(self, x): x = self.backbone(x) # extract attentional regions of each parcel x_obj = [] for i in range(self.num_classes): x_tmp = self.conv_parcels[i](x) tmp = F.max_pool2d(F.relu(x_tmp), 2).view( -1, int(self.num_moda * self.feature_size**2 / 4)) tmp = self.stn_params[i](tmp).view(-1, 2, 3) affine_grid_points = F.affine_grid(tmp, torch.Size( (tmp.size(0), self.num_moda, self.feature_size, self.feature_size)), align_corners=True) x_obj.append( F.grid_sample(x_tmp, affine_grid_points, align_corners=True)) # build relations between each label pair idx_g = 0 outputs = [] for i in range(self.num_classes): relation_inter = 0 # g_theta for j in range(self.num_classes): if not i == j: relation_inter += F.relu(self.g_theta[idx_g](torch.cat( [x_obj[i], x_obj[j]], axis=1))) idx_g += 1 relation_accum = self.avg(relation_inter).view(-1, self.num_units) # f_phi outputs.append(self.f_phi(relation_accum)) outputs = torch.cat(outputs, axis=-1) return outputs
def warp_feature_batch(feature, pts_location, patch_size): # feature must be [1,C,H,W] and pts_location must be [Num-Pts, (x,y)] _, C, H, W = list(feature.size()) num_pts = pts_location.size(0) assert isinstance(patch_size, int) and feature.size(0) == 1 and pts_location.size(1) == 2, 'The shapes of feature or points are not right : {} vs {}'.format(feature.size(), pts_location.size()) assert W > 1 and H > 1, 'To guarantee normalization {}, {}'.format(W, H) def normalize(x, L): return -1. + 2. * x / (L-1) crop_box = torch.cat([pts_location-patch_size, pts_location+patch_size], 1) crop_box[:, [0,2]] = normalize(crop_box[:, [0,2]], W) crop_box[:, [1,3]] = normalize(crop_box[:, [1,3]], H) affine_parameter = [(crop_box[:,2]-crop_box[:,0])/2, crop_box[:,0]*0, (crop_box[:,2]+crop_box[:,0])/2, crop_box[:,0]*0, (crop_box[:,3]-crop_box[:,1])/2, (crop_box[:,3]+crop_box[:,1])/2] #affine_parameter = [(crop_box[:,2]-crop_box[:,0])/2, MU.np2variable(torch.zeros(num_pts),feature.is_cuda,False), (crop_box[:,2]+crop_box[:,0])/2, # MU.np2variable(torch.zeros(num_pts),feature.is_cuda,False), (crop_box[:,3]-crop_box[:,1])/2, (crop_box[:,3]+crop_box[:,1])/2] theta = torch.stack(affine_parameter, 1).view(num_pts, 2, 3) feature = feature.expand(num_pts,C, H, W) grid_size = torch.Size([num_pts, 1, 2*patch_size+1, 2*patch_size+1]) grid = F.affine_grid(theta, grid_size) sub_feature = F.grid_sample(feature, grid) return sub_feature
def roi_align(feat, roi, pool_size): # INPUT: # feat: N * C * H * W # roi: N * roi_num_per_img * 4 x1 y1 x2 y2 # Output: # crop_feat: N * roi_num_per_img * C * pool_size * pool_size N = feat.shape[0] C = feat.shape[1] H = feat.shape[2] W = feat.shape[3] roi_num_per_img = roi.shape[1] x1 = roi[..., 0].view(-1) y1 = roi[..., 1].view(-1) x2 = roi[..., 2].view(-1) y2 = roi[..., 3].view(-1) theta = Variable(roi.data.new(roi.size(0) * roi.size(1), 2, 3).zero_()) theta[:, 0, 0] = (x2 - x1) / (W - 1) theta[:, 0, 2] = (x1 + x2 - W + 1) / (W - 1) theta[:, 1, 1] = (y2 - y1) / (H - 1) theta[:, 1, 2] = (y1 + y2 - H + 1) / (H - 1) theta = theta.view(N, roi_num_per_img, 2, 3).view(N * roi_num_per_img, 2, 3) grid = F.affine_grid(theta, torch.Size((theta.size(0), 1, pool_size, pool_size))) feat = feat[:, None, ...].expand(-1, roi_num_per_img, -1, -1, -1).contiguous().view(N * roi_num_per_img, C, H, W) crop_feat = F.grid_sample(feat, grid).view(N, roi_num_per_img, C, pool_size, pool_size) return crop_feat
def forward(self, input_g, label_g): transform_t = self._build2dTransformMatrix() transform_t = transform_t.expand(input_g.shape[0], -1, -1) transform_t = transform_t.to(input_g.device, torch.float32) affine_t = F.affine_grid(transform_t[:, :2], input_g.size(), align_corners=False) augmented_input_g = F.grid_sample(input_g, affine_t, padding_mode='border', align_corners=False) augmented_label_g = F.grid_sample(label_g.to(torch.float32), affine_t, padding_mode='border', align_corners=False) if self.noise: noise_t = torch.randn_like(augmented_input_g) noise_t *= self.noise augmented_input_g += noise_t return augmented_input_g, augmented_label_g > 0.5
def forward(self, x, x_to_pool_from): batch_size = x.size(0) c_, h_, w_ = x_to_pool_from.size(-3), x_to_pool_from.size( -2), x_to_pool_from.size(-1) c, h, w = x.size(-3) // self.num_groups, x.size(-2), x.size(-1) assert c == c_, "Channel dimensions of augmented and pooled tensors should be equal, got [{}, {}]".format( c, c_) theta = self.localization(x) grid = F.affine_grid( theta, torch.Size((batch_size * self.num_groups, c, h, w))) x_to_pool_from = x_to_pool_from.repeat(1, self.num_groups, 1, 1) x_to_pool_from = x_to_pool_from.view(-1, c_, h_, w_) x_to_pool_from = F.grid_sample(x_to_pool_from, grid, mode=MODE, padding_mode=PADDING_MODE) x_to_pool_from = x_to_pool_from.view(batch_size, self.num_groups * c, h, w) x_out = x + x_to_pool_from x_out = x_out.view(batch_size, self.num_groups, c, h, w) grid = grid.permute(0, 3, 1, 2) grid = grid.view(batch_size, self.num_groups, 2, h, w) x_out = torch.cat([x_out, grid], dim=2).view(batch_size, self.num_groups * (c + 2), h, w) x_out = self.block(x_out) return x_out, grid
def forward(self, input, sign=None, bias=None, rotation=None): _device = input.device N = input.size(0) _theta = self._eye.repeat(N, 1, 1) if sign is None: sign = torch.bernoulli(torch.ones(N, device=_device) * 0.5) * 2 - 1 if bias is None: bias = torch.empty( (N, 2), device=_device).uniform_(-self.max_range, self.max_range) _theta[:, 0, 0] = sign _theta[:, :, 2] = bias if rotation is not None: _theta[:, 0:2, 0:2] = rotation grid = F.affine_grid(_theta, input.size(), **kwargs).to(_device) output = F.grid_sample(input, grid, padding_mode='reflection', **kwargs) return output
def forward(self, x, h_0): # ==== extract features ==== # feats = self.conv(x) feats = feats.view(-1, self.rnn_input_shape) # ==== produce <num_steps> many views ==== # views = [] thetas = [] curr_h = h_0 for step in range(self.num_steps): # regress the affine matrix curr_h = self.RNN(feats, curr_h) theta = self.fc_theta(curr_h.squeeze(0)) thetas.append(theta) # produce the view theta = theta.view(-1, 2, 3) output_size = torch.Size([x.shape[0], *self.input_shape]) grid = F.affine_grid(theta, output_size, align_corners=False) view = F.grid_sample(x, grid, align_corners=False) view = self.transform(view)[0] views.append(view) return views, thetas
def patches_from_z(self, x_img, z_obj): """From z and image get object patches. Grid sample expects input of shape (-1, c, *h*, *w*). But I think, we can ignore this here. This effectively switches our x and y as given in theta around. There should be no side effects from this. (Also have to switch w_out and h_out w.r.t torch documentation.) Args: x_img (torch.Tensor), (nT, c, w, h): Images. z_obj (torch.Tensor), (nTo, 4): Object states. Returns: out (torch.Tensor), (n4o, c, w_out, h_out): Object patches. """ theta = self.expand_z(z_obj) # broadcast x s.t. each image is repeated as many times as there are # objects in the scene. I checked that images are repeated next to each # other, i.e. same as in z, shape (n4o, c, w, h) x_obj = x_img.flatten(start_dim=1).repeat(1, self.c.num_obj) x_obj = x_obj.view(-1, *x_img.size()[1:]) # output shape of grid (n4o, c, patch_width, patch_height) w_out, h_out = self.c.patch_width, self.c.patch_height # grid shape (n4o, w_out, h_out, 2) # grid contains parameters of img used for each sample pixel # (also note that this ignores channel, just like it should) # we take channel from x_obj! grid = F.affine_grid( theta, torch.Size((x_obj.size(0), x_img.shape[1], w_out, h_out))) out = F.grid_sample(x_obj, grid) return out
def gen_mask_parsing_labels(parsing_labels, mask_rois): # parsing_labels (48, 320, 320) mask_rois (48, 5) # rois = rois.detach() x1 = mask_rois[:, 1::4] y1 = mask_rois[:, 2::4] x2 = mask_rois[:, 3::4] y2 = mask_rois[:, 4::4] height = parsing_labels.size(1) width = parsing_labels.size(2) # affine theta theta = Variable(mask_rois.data.new(mask_rois.size(0), 2, 3).zero_()) theta[:, 0, 0] = (x2 - x1) / (width - 1) theta[:, 0, 2] = (x1 + x2 - width + 1) / (width - 1) theta[:, 1, 1] = (y2 - y1) / (height - 1) theta[:, 1, 2] = (y1 + y2 - height + 1) / (height - 1) pre_pool_size = cfg.POOLING_SIZE * 8 grid = F.affine_grid(theta, torch.Size((mask_rois.size(0), 1, pre_pool_size, pre_pool_size))) mask_parsing_labels = F.grid_sample(parsing_labels.unsqueeze(1), grid) # (48,1, 320, 320) mask_parsing_labels = torch.round(mask_parsing_labels) # mask_parsing_labels[mask_parsing_labels >=0.5] = 1 # mask_parsing_labels[mask_parsing_labels < 0.5] = 0 return mask_parsing_labels
def test_affine_grid_3d(self): N = 8 C = 3 D = 64 H = 256 W = 128 theta = np.random.randn(N, 3, 4).astype(np.float32) features = np.random.randint(256, size=(N, C, D, H, W)).astype(np.float32) torch_theta = torch.Tensor(theta) torch_features = torch.Tensor(features) torch_grid = F.affine_grid(torch_theta, size=(N, C, D, H, W), align_corners=False) torch_sample = F.grid_sample(torch_features, torch_grid, mode='bilinear', padding_mode='zeros', align_corners=False) jt_theta = jt.array(theta) jt_features = jt.array(features) jt_grid = affine_grid(jt_theta, size=(N, C, D, H, W), align_corners=False) jt_sample = grid_sample(jt_features, jt_grid, mode='bilinear', padding_mode='zeros', align_corners=False) assert np.allclose(jt_theta.numpy(), torch_theta.numpy()) assert np.allclose(jt_features.numpy(), torch_features.numpy()) assert np.allclose(jt_grid.numpy(), torch_grid.numpy(), atol=1e-05) assert np.allclose(torch_sample.numpy(), jt_sample.numpy(), atol=1e-01)
def forward(self, inp): batch_size = inp.size(0) x = inp[:, :, self.pad:-self.pad, self.pad:-self.pad] # inp: 720x720, x: 480x480. Only use the center portion for training (not the black triangles) x = self.downsample(x) # 3x60x60 x = self.net1_conv1(x) # 20x56x56 x = self.net1_PReLU(x) # 20x56x56 x = self.net1_pool(x) # 20x28x28 x = self.net1_conv2(x) # 48x24x24 x = self.net1_PReLU(x) # 48x24x24 x = self.net1_pool(x) # 48x12x12 x = self.net1_conv3(x) # 64x10x10 x = self.net1_PReLU(x)# 64x10x10 x = self.net1_pool(x)# 64x5x5 x = self.net1_conv4(x) # 80x3x3 x = self.net1_PReLU(x) # 80x3x3 x = x.view(x.size(0), -1) # 720 x = self.net1_fc5_1(x) # 512 x = self.net1_PReLU(x) # 512 x = self.net1_drop6(x) # 512 x = self.net1_68point(x) # 136 landmarks = self.net1_PReLU(x) # landmarks: 136 (68x2 very inaccurate landmarks) theta = self.loc_reg_(landmarks) # 6 theta = theta.view(batch_size, 2, 3) if theta.is_cuda: self.base_theta = self.base_theta.cuda() theta += self.base_theta # For testing # theta = Variable(torch.Tensor([[1, 0, 0],[0, 1, 0]]).cuda().view(1, 2, 3).repeat(batch_size, 1, 1), requires_grad=True) # identity transform matrix # theta = Variable(torch.Tensor([[1.1, 0.5, 0.3], [0, 0.8, -0.1]]).cuda().view(1, 2, 3).repeat(batch_size, 1, 1), requires_grad=True) # stretching transform matrix grid = F.affine_grid(theta, torch.Size([batch_size, 3, 256, 256])) # Prepare the transfomer grid with (256, 256) size that FAN expects, w.r.t theta outp = F.grid_sample(inp, grid) # "Rotate" the image by applying the grid return outp, theta # outp: 256x256, theta: 2x3
def forward(self, x): x1 = self.conv1(x) x1 = self.conv2(x1) x1 = self.conv3(x1) x1 = self.conv4(x1) x1 = x1.view(-1, 20 * 3 * 3) x1 = self.fc_loc(x1) x1 = x1.view(-1, 2, 3) # change it to the 2x3 matrix #1*2*3 #print(x.size()) affine_grid_points = F.affine_grid( x1, torch.Size((x.size(0), self._in_ch, self._h, self._w))) # 1,2,3 * 1,128,128,128 = 1,64,64,2 #print(batch_images.size(0)) #print(affine_grid_points.size(0)) assert ( affine_grid_points.size(0) == x.size(0) ) #"The batch sizes of the input images must be same as the generated grid." 1=1 rois = F.grid_sample(x, affine_grid_points) #print("rois found to be of size:{}".format(rois.size())) return rois ## 1,128,128,128
def __getitem__(self, index): info_path = self.infos[index] img_path = info_path.replace('.txt', '').replace('anno', 'images') original_img = Image.open(img_path).convert("RGB") #加载初始图片 full_img = self.transforms(original_img) with open(info_path) as fil: json_content = json.load(fil) compare_labels = torch.Tensor(json_content['pairs']) # print("numbers", len(compare_labels), compare_labels[0], len(json_content["scores"])) crop_coordinates = json_content['bboxes'] best_rank = torch.Tensor(json_content['best_bboxes_id'][:1]) # print(len(crop_coordinates)) # print("content", best_rank) compare_imgs = torch.zeros(len(crop_coordinates), 3, 224, 224) for i, coordinate in enumerate(crop_coordinates): new_raw_img = full_img[:, coordinate[1]:coordinate[3], coordinate[0]:coordinate[2]] theta = torch.zeros(1, 2, 3) theta[:, 0, 0] = 1 theta[:, 1, 1] = 1 grid = F.affine_grid(theta, (1, 3, 224, 224)) new_img = F.grid_sample(new_raw_img.unsqueeze(0), grid).squeeze(0) compare_imgs[i] = new_img return compare_imgs, compare_labels, best_rank
def affine_grid_3d(size, theta): """ Defines an affine grid in 3 dimensions. Args: size (tuple): tuple of ints containing the dimensions to the moving patch B = batch size C = number of channels D, H, W = dimensions of the input volume in depth, height and width theta (tensor): predicted deformation matrix Returns: A 3d affine grid that is used for transformation Return size: [B, D, H, W, 3] """ # Extract dimensions of the input B, C, D, H, W = size # Expand to the number of batches needed theta = theta.expand(B, 3, 4) # Define grid grid = F.affine_grid(theta, size=(B, C, D, H, W)) grid = grid.view(B, D, H, W, 3) return grid
def elastic(x, ratio=0.8, n=3, p=0.1): N, C, H, W = x.shape H_c, W_c = int((H * W * p)**0.5), int((H * W * p)**0.5) grid = F.affine_grid(affine(x), size=x.size()) grid_y = grid[:, :, :, 0].unsqueeze(3) grid_x = grid[:, :, :, 1].unsqueeze(3) # stretch/contract n small image regions for i in range(0, n): x_coord = int(random.uniform(0, H - H_c)) y_coord = int(random.uniform(0, W - W_c)) x_scale = random.uniform(0, 1 - ratio) + 1 y_scale = x_scale / ratio grid_y[:, x_coord:x_coord + H_c, y_coord:y_coord + W_c] = ( grid_y[:, x_coord:x_coord + H_c, y_coord:y_coord + W_c] * y_scale) grid_x[:, x_coord:x_coord + H_c, y_coord:y_coord + W_c] = ( grid_x[:, x_coord:x_coord + H_c, y_coord:y_coord + W_c] * x_scale) grid = torch.cat([grid_y, grid_x], dim=3) img = F.grid_sample(x, grid, padding_mode="border") return img
import torch.nn.functional as F eps = np.finfo(np.float64).eps plt.rcParams['figure.figsize'] = 10, 10 ''' Affine crop testing ''' img_t = F.interpolate(transforms.ToTensor()(scipy.misc.face()).unsqueeze(0), size=(768, 768), mode='bilinear') theta = torch.from_numpy(np.array([[[0.2, 0.0, 0.2], [0.0, 0.2, 0.3]]])) grid = F.affine_grid(theta, torch.Size((1, 3, 768, 768))) grid.size() plt.rcParams['figure.figsize'] = 8, 8 fig, axis = plt.subplots(nrows=1, ncols=2) axis[0].imshow(grid[0, :, :, 0]) axis[0].set_title('x') axis[1].imshow(grid[0, :, :, 1]) axis[1].set_title('y') plt.show() G = torch.bmm(grid[:, :, 0, 1].unsqueeze(2), grid[:, 0, :, 0].unsqueeze(1)) G.size()
data, class_labels = zip(*train_loader) data = torch.cat(data) batch_size = args.batch_size n_inputs = 2 ortho_grids = [] for flip in [1,-1]: for theta in [0, math.pi/2, math.pi, 3*math.pi/2]: trans = torch.Tensor([[math.cos(theta), flip*-math.sin(theta), 0], [math.sin(theta), flip*math.cos(theta), 0]]).view(1,2,3) trans = trans.repeat(1,1,1) #?? grid = F.affine_grid(trans,torch.Size([1,1,28,28])) ortho_grids.append( grid ) ortho_grids = torch.cat(ortho_grids,0) small_grids = [] for _ in range(20): trans = torch.Tensor([[1,0,0],[0,1,0] ]).view(1,2,3) + torch.randn(1,2,3)*0.05 trans = trans.repeat(1,1,1) grid = F.affine_grid(trans, torch.Size([1,1,28,28])) small_grids.append( grid ) small_grids = torch.cat(small_grids, 0)
def grid_cropper(img_t, theta, h=768, w=768): grid = F.affine_grid(theta, torch.Size((1, 3, h, w))) crop = F.grid_sample(img_t, grid.type(torch.float32), padding_mode='zeros') return crop