def grid_sample(input, grid, canvas=None): input.stop_gradient = False output = F.grid_sample(input, grid) if canvas is None: return output else: input_mask = paddle.ones(shape=input.shape) output_mask = F.grid_sample(input_mask, grid) padded_output = output * output_mask + canvas * (1 - output_mask) return padded_output
def point_sample(input, points, align_corners=False, **kwargs): """ A wrapper around :func:`grid_sample` to support 3D point_coords tensors Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to lie inside ``[0, 1] x [0, 1]`` square. Args: input (Tensor): Feature map, shape (N, C, H, W). points (Tensor): Image based absolute point coordinates (normalized), range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). align_corners (bool): Whether align_corners. Default: False Returns: Tensor: Features of `point` on `input`, shape (N, C, P) or (N, C, Hgrid, Wgrid). """ def denormalize(grid): """Denormalize input grid from range [0, 1] to [-1, 1] Args: grid (Tensor): The grid to be denormalize, range [0, 1]. Returns: Tensor: Denormalized grid, range [-1, 1]. """ return grid * 2.0 - 1.0 add_dim = False if points.dim() == 3: add_dim = True points = paddle.unsqueeze(points, axis=2) output = F.grid_sample(input, denormalize(points), align_corners=align_corners, **kwargs) if add_dim: output = paddle.squeeze(output, axis=3) return output
def warp(self, x, disp): """ warp an image/tensor (im2) back to im1, according to the optical flow x: [B, C, H, W] (im2) disp: [B, 1, H, W] flo: [B, 2, H, W] flow output: [B, C, H, W] (im1) """ B, C, H, W = x.shape # mesh grid xx = paddle.expand(paddle.arange(0, W, step=1, dtype='float32').reshape(shape=[1, -1]), shape=[H, W]) yy = paddle.expand(paddle.arange(0, H, step=1, dtype='float32').reshape(shape=[-1, 1]), shape=[H, W]) xx = paddle.expand(xx.reshape(shape=[1, 1, H, W]), shape=[B, 1, H, W]) yy = paddle.expand(yy.reshape(shape=[1, 1, H, W]), shape=[B, 1, H, W]) vgrid = paddle.concat((xx, yy), axis=1) #[B, 2, H, W] vgrid[:, :1, :, :] = vgrid[:, :1, :, :] - disp # scale grid to [-1,1] vgrid[:, 0, :, :] = 2.0 * vgrid[:, 0, :, :] / max(W - 1, 1) - 1.0 vgrid[:, 1, :, :] = 2.0 * vgrid[:, 1, :, :] / max(H - 1, 1) - 1.0 vgrid = paddle.transpose(vgrid, [0, 2, 3, 1]) #[B, H, W, 2] vgrid.stop_gradient = False output = F.grid_sample(x, vgrid) return output
def deform_input(self, inp, deformation): _, h_old, w_old, _ = deformation.shape _, _, h, w = inp.shape if h_old != h or w_old != w: deformation = deformation.transpose([0, 3, 1, 2]) deformation = F.interpolate(deformation, size=(h, w), mode='bilinear', align_corners=False) deformation = deformation.transpose([0, 2, 3, 1]) if self.inference: identity_grid = make_coordinate_grid((h, w), type=inp.dtype) identity_grid = identity_grid.reshape([1, h, w, 2]) visualization_matrix = np.zeros((h, w)).astype("float32") visualization_matrix[self.pad:h - self.pad, self.pad:w - self.pad] = 1.0 gauss_kernel = paddle.to_tensor( cv2.GaussianBlur(visualization_matrix, (9, 9), 0.0, borderType=cv2.BORDER_ISOLATED)) gauss_kernel = gauss_kernel.unsqueeze(0).unsqueeze(-1) deformation = gauss_kernel * deformation + ( 1 - gauss_kernel) * identity_grid return F.grid_sample(inp, deformation, mode='bilinear', padding_mode='zeros', align_corners=True)
def _grid_transform(img, grid, mode, fill): if img.shape[0] > 1: grid = grid.expand(img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]) if fill is not None: dummy = paddle.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) img = paddle.concat((img, dummy), axis=1) img = F.grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False) # Fill with required color if fill is not None: mask = img[:, -1:, :, :] # n 1 h w img = img[:, :-1, :, :] # n c h w mask = mask.expand_as(img) len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 fill_img = paddle.to_tensor(fill).reshape( (1, len_fill, 1, 1)).expand_as(img) if mode == 'nearest': mask = paddle.cast(mask < 0.5, img.dtype) img = img * (1. - mask) + mask * fill_img else: # 'bilinear' img = img * mask + (1.0 - mask) * fill_img return img
def paste_mask(self, masks, boxes, im_h, im_w): """ Paste the mask prediction to the original image. """ x0_int, y0_int = 0, 0 x1_int, y1_int = im_w, im_h x0, y0, x1, y1 = paddle.split(boxes, 4, axis=1) N = masks.shape[0] img_y = paddle.arange(y0_int, y1_int) + 0.5 img_x = paddle.arange(x0_int, x1_int) + 0.5 img_y = (img_y - y0) / (y1 - y0) * 2 - 1 img_x = (img_x - x0) / (x1 - x0) * 2 - 1 # img_x, img_y have shapes (N, w), (N, h) if self.assign_on_cpu: paddle.set_device('cpu') gx = img_x[:, None, :].expand( [N, paddle.shape(img_y)[1], paddle.shape(img_x)[1]]) gy = img_y[:, :, None].expand( [N, paddle.shape(img_y)[1], paddle.shape(img_x)[1]]) grid = paddle.stack([gx, gy], axis=3) img_masks = F.grid_sample(masks, grid, align_corners=False) return img_masks[:, 0]
def forward(self, image): image.stop_gradient = False batch_C_prime = self.loc_net(image) batch_P_prime = self.grid_generator(batch_C_prime, image.shape[2:]) batch_P_prime = batch_P_prime.reshape( [-1, image.shape[2], image.shape[3], 2]) batch_I_r = F.grid_sample(x=image, grid=batch_P_prime) return batch_I_r
def _reg_grid_sample(self, feat, offset, anchor_points): b, _, h, w = get_static_shape(feat) feat = paddle.reshape(feat, [-1, 1, h, w]) offset = paddle.reshape(offset, [-1, 2, h, w]).transpose([0, 2, 3, 1]) grid_shape = paddle.concat([w, h]).astype('float32') grid = (offset + anchor_points) / grid_shape grid = 2 * grid.clip(0., 1.) - 1 feat = F.grid_sample(feat, grid) feat = paddle.reshape(feat, [b, -1, h, w]) return feat
def transform_frame(self, frame): grid = make_coordinate_grid(frame.shape[2:], 'float32').unsqueeze(0) grid = grid.reshape((1, frame.shape[2] * frame.shape[3], 2)) grid = self.warp_coordinates(grid).reshape( (self.bs, frame.shape[2], frame.shape[3], 2)) return F.grid_sample(frame, grid, mode='bilinear', padding_mode='reflection', align_corners=True)
def dynamic_functional(self): x_t = paddle.to_tensor(self.x) grid_t = paddle.to_tensor(self.grid) y_t = F.grid_sample(x_t, grid_t, mode=self.mode, padding_mode=self.padding_mode, align_corners=self.align_corners) y_np = y_t.numpy() return y_np
def deform_input(self, inp, deformation): _, h_old, w_old, _ = deformation.shape _, _, h, w = inp.shape if h_old != h or w_old != w: deformation = deformation.transpose([0, 3, 1, 2]) deformation = F.interpolate(deformation, size=(h, w), mode='bilinear', align_corners=False) deformation = deformation.transpose([0, 2, 3, 1]) return F.grid_sample(inp, deformation, align_corners=False)
def flow_warp(self, input, flow, size): input_shape = paddle.shape(input) norm = size[::-1].reshape([1, 1, 1, -1]) h_grid = paddle.linspace(-1.0, 1.0, size[0]).reshape([-1, 1]) h_grid = h_grid.tile([size[1]]) w_grid = paddle.linspace(-1.0, 1.0, size[1]).reshape([-1, 1]) w_grid = w_grid.tile([size[0]]).transpose([1, 0]) grid = paddle.concat( [w_grid.unsqueeze(2), h_grid.unsqueeze(2)], axis=2) grid.unsqueeze(0).tile([input_shape[0], 1, 1, 1]) grid = grid + paddle.transpose(flow, (0, 2, 3, 1)) / norm output = F.grid_sample(input, grid) return output
def deform_input(self, inp, deformation): _, h_old, w_old, _ = deformation.shape _, _, h, w = inp.shape if h_old != h or w_old != w: deformation = paddle.transpose(deformation, (0, 3, 1, 2)) deformation = F.interpolate(deformation, size=(h, w), mode='BILINEAR', align_corners=False) deformation = paddle.transpose(deformation, (0, 2, 3, 1)) return F.grid_sample(inp, deformation, mode='bilinear', padding_mode='zeros', align_corners=True)
def paste_mask(self, masks, boxes, im_h, im_w): # paste each mask on image x0, y0, x1, y1 = paddle.split(boxes, 4, axis=1) masks = paddle.unsqueeze(masks, [0, 1]) img_y = paddle.arange(0, im_h, dtype='float32') + 0.5 img_x = paddle.arange(0, im_w, dtype='float32') + 0.5 img_y = (img_y - y0) / (y1 - y0) * 2 - 1 img_x = (img_x - x0) / (x1 - x0) * 2 - 1 img_x = paddle.unsqueeze(img_x, [1]) img_y = paddle.unsqueeze(img_y, [2]) N = boxes.shape[0] gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]]) gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]]) grid = paddle.stack([gx, gy], axis=3) img_masks = F.grid_sample(masks, grid, align_corners=False) return img_masks[:, 0]
def flow_warp(self, input, flow, size): out_h, out_w = size n, c, h, w = input.shape norm = paddle.to_tensor(np.array([[[[out_w, out_h]]]]), dtype='float32') h_grid = paddle.linspace(-1.0, 1.0, out_h).reshape([-1, 1]) h_grid = paddle.concat([h_grid] * out_w, axis=1) w_grid = paddle.linspace(-1.0, 1.0, out_w).reshape([1, -1]) w_grid = paddle.concat([w_grid] * out_h, axis=0) grid = paddle.concat( [w_grid.unsqueeze(2), h_grid.unsqueeze(2)], axis=2) grid = paddle.concat([grid.unsqueeze(0)] * n, axis=0) grid = grid + paddle.transpose(flow, (0, 2, 3, 1)) / norm output = F.grid_sample(input, grid) return output
def static_functional(self, place): main = fluid.Program() start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): x = fluid.data("x", self.x_shape, dtype=self.dtype) grid = fluid.data("grid", self.grid_shape, dtype=self.dtype) y_var = F.grid_sample(x, grid, mode=self.mode, padding_mode=self.padding_mode, align_corners=self.align_corners) feed_dict = {"x": self.x, "grid": self.grid} exe = fluid.Executor(place) exe.run(start) y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) return y_np
def create_deformed_source_image(self, source_image, sparse_motions): """ Eq 7. in the paper \hat{T}_{s<-d}(z) """ bs, _, h, w = source_image.shape source_repeat = paddle.tile( source_image.unsqueeze(1).unsqueeze(1), [1, self.num_kp + 1, 1, 1, 1, 1 ]) #.repeat(1, self.num_kp + 1, 1, 1, 1, 1) source_repeat = source_repeat.reshape( [bs * (self.num_kp + 1), -1, h, w]) sparse_motions = sparse_motions.reshape( (bs * (self.num_kp + 1), h, w, -1)) sparse_deformed = F.grid_sample(source_repeat, sparse_motions, align_corners=False) sparse_deformed = sparse_deformed.reshape( (bs, self.num_kp + 1, -1, h, w)) return sparse_deformed
def paste_mask(self, masks, boxes, im_h, im_w): # paste each mask on image x0, y0, x1, y1 = paddle.split(boxes, 4, axis=1) masks = paddle.unsqueeze(masks, [0, 1]) img_y = paddle.arange(0, im_h, dtype='float32') + 0.5 img_x = paddle.arange(0, im_w, dtype='float32') + 0.5 img_y = (img_y - y0) / (y1 - y0) * 2 - 1 img_x = (img_x - x0) / (x1 - x0) * 2 - 1 img_x = paddle.unsqueeze(img_x, [1]) img_y = paddle.unsqueeze(img_y, [2]) N = boxes.shape[0] gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]]) gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]]) # TODO: Because paddle.expand transform error when dygraph # to static, use reshape to avoid mistakes. gx = paddle.reshape(gx, [N, img_y.shape[1], img_x.shape[2]]) gy = paddle.reshape(gy, [N, img_y.shape[1], img_x.shape[2]]) grid = paddle.stack([gx, gy], axis=3) img_masks = F.grid_sample(masks, grid, align_corners=False) return img_masks[:, 0]
def deformable_attention_core_func(value, value_spatial_shapes, sampling_locations, attention_weights): """ Args: value (Tensor): [bs, value_length, n_head, c] value_spatial_shapes (Tensor): [n_levels, 2] sampling_locations (Tensor): [bs, query_length, n_head, n_levels, n_points, 2] attention_weights (Tensor): [bs, query_length, n_head, n_levels, n_points] Returns: output (Tensor): [bs, Length_{query}, C] """ bs, Len_v, n_head, c = value.shape _, Len_q, n_head, n_levels, n_points, _ = sampling_locations.shape value_list = value.split(value_spatial_shapes.prod(1).tolist(), axis=1) sampling_grids = 2 * sampling_locations - 1 sampling_value_list = [] for level, (h, w) in enumerate(value_spatial_shapes.tolist()): # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ value_l_ = value_list[level].flatten(2).transpose([0, 2, 1]).reshape( [bs * n_head, c, h, w]) # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 sampling_grid_l_ = sampling_grids[:, :, :, level].transpose([0, 2, 1, 3, 4]).flatten(0, 1) # N_*M_, D_, Lq_, P_ sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, mode='bilinear', padding_mode='zeros', align_corners=False) sampling_value_list.append(sampling_value_l_) # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_*M_, 1, Lq_, L_*P_) attention_weights = attention_weights.transpose([0, 2, 1, 3, 4]).reshape( [bs * n_head, 1, Len_q, n_levels * n_points]) output = (paddle.stack(sampling_value_list, axis=-2).flatten(-2) * attention_weights).sum(-1).reshape([bs, n_head * c, Len_q]) return output.transpose([0, 2, 1])
def create_deformed_source_image(self, source_image, sparse_motions): """ Eq 7. in the paper \hat{T}_{s<-d}(z) """ bs, _, h, w = source_image.shape source_image = source_image.reshape((-1, h, w)) source_repeat = paddle.tile( source_image.unsqueeze(1).unsqueeze(1), (self.num_kp + 1, 1, 1, 1, 1)) source_repeat = source_repeat.reshape( (bs * (self.num_kp + 1), -1, h, w)) sparse_motions = sparse_motions.reshape( (bs * (self.num_kp + 1), h, w, -1)) # Important: grid_sampler(..., align_corners=True) in pytorch 1.0 sparse_deformed = F.grid_sample(source_repeat, sparse_motions, mode='bilinear', padding_mode='zeros', align_corners=True) sparse_deformed = sparse_deformed.reshape( (bs, self.num_kp + 1, -1, h, w)) return sparse_deformed
def forward(self, x, offset, mask): in_C = self.in_channels out_C = self.out_channels stride = self.stride padding = self.padding # dilation = self.dilation groups = self.groups N, _, H, W = x.shape _, w_in, kH, kW = self.weight.shape out_W = (W + 2 * padding - (kW - 1)) // stride out_H = (H + 2 * padding - (kH - 1)) // stride # ================== 1.先对图片x填充得到填充后的图片pad_x ================== pad_x_H = H + padding * 2 + 1 pad_x_W = W + padding * 2 + 1 pad_x = F.pad(x, pad=[0, 0, 0, 0, padding, padding + 1, padding, padding + 1], value=0.0) # ================== 2.求所有采样点的坐标 ================== # 卷积核中心点在pad_x中的位置 y_outer, x_outer = paddle.meshgrid([paddle.arange(out_H), paddle.arange(out_W)]) y_outer = y_outer * stride + padding x_outer = x_outer * stride + padding start_pos_yx = paddle.stack((y_outer, x_outer), 2).cast(dtype='float32') # [out_H, out_W, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.unsqueeze(start_pos_yx, axis=[0, 3]) # [1, out_H, out_W, 1, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.tile(start_pos_yx, [N, 1, 1, kH * kW, 1]) # [N, out_H, out_W, kH*kW, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y = start_pos_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_x = start_pos_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y.stop_gradient = True start_pos_x.stop_gradient = True # 卷积核内部的偏移 half_W = (kW - 1) // 2 half_H = (kH - 1) // 2 y_inner, x_inner = paddle.meshgrid([paddle.arange(kH), paddle.arange(kW)]) y_inner -= half_H x_inner -= half_W filter_inner_offset_yx = paddle.stack((y_inner, x_inner), 2).cast(dtype='float32') # [kH, kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.reshape(filter_inner_offset_yx, (1, 1, 1, kH * kW, 2)) # [1, 1, 1, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.tile(filter_inner_offset_yx, [N, out_H, out_W, 1, 1]) # [N, out_H, out_W, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_y = filter_inner_offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_x = filter_inner_offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_y.stop_gradient = True filter_inner_offset_x.stop_gradient = True # 预测的偏移 offset = paddle.transpose(offset, [0, 2, 3, 1]) # [N, out_H, out_W, kH*kW*2] offset_yx = paddle.reshape(offset, (N, out_H, out_W, kH * kW, 2)) # [N, out_H, out_W, kH*kW, 2] offset_y = offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] offset_x = offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] # 最终采样位置。 pos_y = start_pos_y + filter_inner_offset_y + offset_y # [N, out_H, out_W, kH*kW, 1] pos_x = start_pos_x + filter_inner_offset_x + offset_x # [N, out_H, out_W, kH*kW, 1] pos_y = paddle.clip(pos_y, 0.0, H + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 pos_x = paddle.clip(pos_x, 0.0, W + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 # ================== 3.采样。用F.grid_sample()双线性插值采样。 ================== pos_x = pos_x / (pad_x_W - 1) * 2.0 - 1.0 pos_y = pos_y / (pad_x_H - 1) * 2.0 - 1.0 xtyt = paddle.concat([pos_x, pos_y], -1) # [N, out_H, out_W, kH*kW, 2] xtyt = paddle.reshape(xtyt, (N, out_H, out_W * kH * kW, 2)) # [N, out_H, out_W*kH*kW, 2] value = F.grid_sample(pad_x, xtyt, mode='bilinear', padding_mode='zeros', align_corners=True) # [N, in_C, out_H, out_W*kH*kW] value = paddle.reshape(value, (N, in_C, out_H, out_W, kH * kW)) # [N, in_C, out_H, out_W, kH * kW] value = value.transpose((0, 1, 4, 2, 3)) # [N, in_C, kH * kW, out_H, out_W] # ================== 4.乘以重要程度 ================== # 乘以重要程度 mask = paddle.unsqueeze(mask, [1]) # [N, 1, kH * kW, out_H, out_W] value = value * mask # [N, in_C, kH * kW, out_H, out_W] new_x = paddle.reshape(value, (N, in_C * kH * kW, out_H, out_W)) # [N, in_C * kH * kW, out_H, out_W] # ================== 5.乘以本层的权重,加上偏置 ================== # 1x1卷积 rw = paddle.reshape(self.weight, (out_C, w_in * kH * kW, 1, 1)) # [out_C, w_in, kH, kW] -> [out_C, w_in*kH*kW, 1, 1] 变成1x1卷积核 out = F.conv2d(new_x, rw, bias=self.bias, stride=1, groups=groups) # [N, out_C, out_H, out_W] return out