def forward(self, inputs): """ Get SOLOv2MaskHead output. Args: inputs(list[Tensor]): feature map from each necks with shape of [N, C, H, W] Returns: ins_pred(Tensor): Output of SOLOv2MaskHead head """ feat_all_level = F.relu(self.convs_all_levels[0](inputs[0])) for i in range(1, self.range_level): input_p = inputs[i] if i == (self.range_level - 1): input_feat = input_p x_range = paddle.linspace( -1, 1, paddle.shape(input_feat)[-1], dtype='float32') y_range = paddle.linspace( -1, 1, paddle.shape(input_feat)[-2], dtype='float32') y, x = paddle.meshgrid([y_range, x_range]) x = paddle.unsqueeze(x, [0, 1]) y = paddle.unsqueeze(y, [0, 1]) y = paddle.expand( y, shape=[paddle.shape(input_feat)[0], 1, -1, -1]) x = paddle.expand( x, shape=[paddle.shape(input_feat)[0], 1, -1, -1]) coord_feat = paddle.concat([x, y], axis=1) input_p = paddle.concat([input_p, coord_feat], axis=1) feat_all_level = paddle.add(feat_all_level, self.convs_all_levels[i](input_p)) ins_pred = F.relu(self.conv_pred(feat_all_level)) return ins_pred
def get_output_and_grid(self, output, k, stride): grid = self.grids[k] batch_size = output.shape[0] n_ch = 5 + self.num_classes hsize, wsize = output.shape[-2:] if grid.shape[2:4] != output.shape[2:4]: yv, xv = paddle.meshgrid( [paddle.arange(hsize), paddle.arange(wsize)]) grid = paddle.stack((xv, yv), 2) grid = paddle.reshape(grid, (1, 1, hsize, wsize, 2)) grid = paddle.cast(grid, dtype=output.dtype) self.grids[k] = grid output = paddle.reshape( output, (batch_size, self.n_anchors, n_ch, hsize, wsize)) output = paddle.transpose(output, [0, 1, 3, 4, 2]) output = paddle.reshape(output, (batch_size, self.n_anchors * hsize * wsize, -1)) # [N, 1 * 80 * 80, 85] grid = paddle.reshape(grid, (1, -1, 2)) # [1, 1 * 80 * 80, 2] xy = (output[:, :, :2] + grid) * stride # [N, 1 * 80 * 80, 2] xy解码 wh = paddle.exp(output[:, :, 2:4]) * stride # [N, 1 * 80 * 80, 2] wh解码 output = paddle.concat([xy, wh, output[:, :, 4:]], 2) # [N, 1 * 80 * 80, 85] 解码后的xywh放回output里面 return output, grid
def __init__(self, channels, scale): super(AntiAliasInterpolation2d, self).__init__() sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 self.ka = kernel_size // 2 self.kb = self.ka - 1 if kernel_size % 2 == 0 else self.ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] # The gaussian kernel is the product of the # gaussian function of each dimension. kernel = 1 meshgrids = paddle.meshgrid( [paddle.arange(size, dtype='float32') for size in kernel_size]) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= paddle.exp(-(mgrid - mean)**2 / (2 * std**2 + 1e-9)) # Make sure sum of values in gaussian kernel equals 1. kernel = kernel / paddle.sum(kernel) # Reshape to depthwise convolutional weight kernel = kernel.reshape([1, 1, *kernel.shape]) kernel = paddle.tile(kernel, [channels, *[1] * (kernel.dim() - 1)]) self.register_buffer('weight', kernel) self.groups = channels self.scale = scale
def rand_cutout(x, ratio=0.5): cutout_size = int(x.shape[2] * ratio + 0.5), int(x.shape[3] * ratio + 0.5) offset_x = paddle.randint(0, x.shape[2] + (1 - cutout_size[0] % 2), shape=[x.shape[0], 1, 1]) offset_y = paddle.randint(0, x.shape[3] + (1 - cutout_size[1] % 2), shape=[x.shape[0], 1, 1]) # TODO: Current version paddle doesn't support int64 Tensors indices # grid_batch, grid_x, grid_y = paddle.meshgrid( # paddle.arange(x.shape[0], dtype='int64'), # paddle.arange(cutout_size[0], dtype='int64'), # paddle.arange(cutout_size[1], dtype='int64'), # ) # grid_x = paddle.clip((grid_x + offset_x - cutout_size[0] // 2).astype(x.dtype), min=0, max=x.shape[2] - 1).astype('int64') # grid_y = paddle.clip((grid_y + offset_y - cutout_size[1] // 2).astype(x.dtype), min=0, max=x.shape[3] - 1).astype('int64') # mask = paddle.ones([x.shape[0], x.shape[2], x.shape[3]], dtype=x.dtype) # mask[grid_batch, grid_x, grid_y] = 0 grid_batch, grid_x, grid_y = paddle.meshgrid( paddle.arange(x.shape[0], dtype='int64'), paddle.arange(x.shape[2], dtype='int64'), paddle.arange(x.shape[3], dtype='int64'), ) grid_x = grid_x + offset_x - cutout_size[0] // 2 grid_y = grid_y + offset_y - cutout_size[1] // 2 mask = 1 - ((grid_x >= 0).astype(x.dtype) * (grid_x < cutout_size[0]).astype(x.dtype) * (grid_y >= 0).astype(x.dtype) * (grid_y < cutout_size[1]).astype(x.dtype)).astype(x.dtype) x = x * mask.unsqueeze(1).detach() return x
def rand_translation(x, ratio=0.125): shift_x, shift_y = int(x.shape[2] * ratio + 0.5), int(x.shape[3] * ratio + 0.5) translation_x = paddle.randint(-shift_x, shift_x + 1, shape=[x.shape[0], 1, 1]) translation_y = paddle.randint(-shift_y, shift_y + 1, shape=[x.shape[0], 1, 1]) grid_batch, grid_x, grid_y = paddle.meshgrid( paddle.arange(x.shape[0], dtype='int64'), paddle.arange(x.shape[2], dtype='int64'), paddle.arange(x.shape[3], dtype='int64'), ) grid_x = paddle.clip((grid_x + translation_x + 1).astype(x.dtype), 0, x.shape[2] + 1).astype('int64') grid_y = paddle.clip((grid_y + translation_y + 1).astype(x.dtype), 0, x.shape[3] + 1).astype('int64') x_pad = F.pad(x, [1, 1, 1, 1]) # TODO: Current version paddle doesn't support int64 Tensors indices # x = x_pad.transpose([0, 2, 3, 1])[grid_batch, grid_x, grid_y].transpose([0, 3, 1, 2]) indices = paddle.stack([grid_batch, grid_x, grid_y], -1) x = x_pad.transpose([0, 2, 3, 1]).gather_nd(indices).transpose([0, 3, 1, 2]) return x
def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): super().__init__() self.dim = dim self.window_size = window_size # Wh, Ww self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim ** -0.5 # define a parameter table of relative position bias relative_position_bias_table = self.create_parameter( shape=((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads), default_initializer=nn.initializer.Constant(value=0)) # 2*Wh-1 * 2*Ww-1, nH self.add_parameter("relative_position_bias_table", relative_position_bias_table) # get pair-wise relative position index for each token inside the window coords_h = paddle.arange(self.window_size[0]) coords_w = paddle.arange(self.window_size[1]) coords = paddle.stack(paddle.meshgrid([coords_h, coords_w])) # 2, Wh, Ww coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww relative_coords = coords_flatten.unsqueeze(-1) - coords_flatten.unsqueeze(1) # 2, Wh*Ww, Wh*Ww relative_coords = relative_coords.transpose([1, 2, 0]) # Wh*Ww, Wh*Ww, 2 relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 relative_coords[:, :, 1] += self.window_size[1] - 1 relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 self.relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww self.register_buffer("relative_position_index", self.relative_position_index) self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) self.softmax = nn.Softmax(axis=-1)
def _meshgrid(self, x, y, row_major=True): yy, xx = paddle.meshgrid(y, x) yy = yy.reshape([-1]) xx = xx.reshape([-1]) if row_major: return xx, yy else: return yy, xx
def _create_grid_offsets(self, size, stride, offset): grid_height, grid_width = size[0], size[1] shifts_x = paddle.arange( offset * stride, grid_width * stride, step=stride, dtype='float32') shifts_y = paddle.arange( offset * stride, grid_height * stride, step=stride, dtype='float32') shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x) shift_x = paddle.reshape(shift_x, [-1]) shift_y = paddle.reshape(shift_y, [-1]) return shift_x, shift_y
def build_P_paddle(self, I_r_size): I_r_height, I_r_width = I_r_size I_r_grid_x = (paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0) / I_r_width # self.I_r_width I_r_grid_y = ( paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0) / I_r_height # self.I_r_height # P: self.I_r_width x self.I_r_height x 2 P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.transpose(P, perm=[1, 0, 2]) # n (= self.I_r_width x self.I_r_height) x 2 return P.reshape([-1, 2])
def build_P_paddle(self, I_r_size): I_r_width, I_r_height = I_r_size I_r_grid_x = paddle.divide( (paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0), paddle.to_tensor(I_r_width).astype('float32')) I_r_grid_y = paddle.divide( (paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0), paddle.to_tensor(I_r_height).astype('float32')) P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.transpose(P, perm=[1, 0, 2]) return P.reshape([-1, 2])
def get_coord_features(self, points, batchsize, rows, cols): if self.cpu_mode: coords = [] for i in range(batchsize): norm_delimeter = (1.0 if self.use_disks else self.spatial_scale * self.norm_radius) coords.append( self._get_dist_maps(points[i].numpy().astype("float32"), rows, cols, norm_delimeter)) coords = paddle.to_tensor(np.stack(coords, axis=0)).astype("float32") else: num_points = points.shape[1] // 2 points = points.reshape([-1, points.shape[2]]) points, points_order = paddle.split(points, [2, 1], axis=1) invalid_points = paddle.max(points, axis=1, keepdim=False) < 0 row_array = paddle.arange(start=0, end=rows, step=1, dtype="float32") col_array = paddle.arange(start=0, end=cols, step=1, dtype="float32") coord_rows, coord_cols = paddle.meshgrid(row_array, col_array) coords = paddle.unsqueeze(paddle.stack([coord_rows, coord_cols], axis=0), axis=0).tile([points.shape[0], 1, 1, 1]) add_xy = (points * self.spatial_scale).reshape( [points.shape[0], points.shape[1], 1, 1]) coords = coords - add_xy if not self.use_disks: coords = coords / (self.norm_radius * self.spatial_scale) coords = coords * coords coords[:, 0] += coords[:, 1] coords = coords[:, :1] invalid_points = invalid_points.numpy() coords[invalid_points, :, :, :] = 1e6 coords = coords.reshape([-1, num_points, 1, rows, cols]) coords = paddle.min(coords, axis=1) coords = coords.reshape([-1, 2, rows, cols]) if self.use_disks: coords = (coords <= (self.norm_radius * self.spatial_scale)** 2).astype("float32") else: coords = paddle.tanh(paddle.sqrt(coords) * 2) return coords
def get_reference_points(spatial_shapes, valid_ratios): valid_ratios = valid_ratios.unsqueeze(1) reference_points = [] for i, (H, W) in enumerate(spatial_shapes.tolist()): ref_y, ref_x = paddle.meshgrid(paddle.linspace(0.5, H - 0.5, H), paddle.linspace(0.5, W - 0.5, W)) ref_y = ref_y.flatten().unsqueeze(0) / (valid_ratios[:, :, i, 1] * H) ref_x = ref_x.flatten().unsqueeze(0) / (valid_ratios[:, :, i, 0] * W) reference_points.append(paddle.stack((ref_x, ref_y), axis=-1)) reference_points = paddle.concat(reference_points, 1).unsqueeze(2) reference_points = reference_points * valid_ratios return reference_points
def build_P_paddle(self, I_r_size): I_r_height, I_r_width = I_r_size I_r_grid_x = (paddle.arange(-I_r_width, I_r_width, 2, dtype='float64') + 1.0) / paddle.to_tensor(np.array([I_r_width])) I_r_grid_y = ( paddle.arange(-I_r_height, I_r_height, 2, dtype='float64') + 1.0) / paddle.to_tensor(np.array([I_r_height])) # P: self.I_r_width x self.I_r_height x 2 P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.transpose(P, perm=[1, 0, 2]) # n (= self.I_r_width x self.I_r_height) x 2 return P.reshape([-1, 2])
def generate_anchors_for_grid_cell(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.5): r""" Like ATSS, generate anchors based on grid size. Args: feats (List[Tensor]): shape[s, (b, c, h, w)] fpn_strides (tuple|list): shape[s], stride for each scale feature grid_cell_size (float): anchor size grid_cell_offset (float): The range is between 0 and 1. Returns: anchors (Tensor): shape[l, 4], "xmin, ymin, xmax, ymax" format. anchor_points (Tensor): shape[l, 2], "x, y" format. num_anchors_list (List[int]): shape[s], contains [s_1, s_2, ...]. stride_tensor (Tensor): shape[l, 1], contains the stride for each scale. """ assert len(feats) == len(fpn_strides) anchors = [] anchor_points = [] num_anchors_list = [] stride_tensor = [] for feat, stride in zip(feats, fpn_strides): _, _, h, w = feat.shape cell_half_size = grid_cell_size * stride * 0.5 shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride shift_y, shift_x = paddle.meshgrid(shift_y, shift_x) anchor = paddle.stack([ shift_x - cell_half_size, shift_y - cell_half_size, shift_x + cell_half_size, shift_y + cell_half_size ], axis=-1).astype(feat.dtype) anchor_point = paddle.stack([shift_x, shift_y], axis=-1).astype(feat.dtype) anchors.append(anchor.reshape([-1, 4])) anchor_points.append(anchor_point.reshape([-1, 2])) num_anchors_list.append(len(anchors[-1])) stride_tensor.append( paddle.full([num_anchors_list[-1], 1], stride, dtype=feat.dtype)) anchors = paddle.concat(anchors) anchors.stop_gradient = True anchor_points = paddle.concat(anchor_points) anchor_points.stop_gradient = True stride_tensor = paddle.concat(stride_tensor) stride_tensor.stop_gradient = True return anchors, anchor_points, num_anchors_list, stride_tensor
def _get_output_single(self, input, idx): ins_kernel_feat = input # CoordConv x_range = paddle.linspace(-1, 1, paddle.shape(ins_kernel_feat)[-1], dtype='float32') y_range = paddle.linspace(-1, 1, paddle.shape(ins_kernel_feat)[-2], dtype='float32') y, x = paddle.meshgrid([y_range, x_range]) x = paddle.unsqueeze(x, [0, 1]) y = paddle.unsqueeze(y, [0, 1]) y = paddle.expand(y, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1]) x = paddle.expand(x, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1]) coord_feat = paddle.concat([x, y], axis=1) ins_kernel_feat = paddle.concat([ins_kernel_feat, coord_feat], axis=1) # kernel branch kernel_feat = ins_kernel_feat seg_num_grid = self.seg_num_grids[idx] kernel_feat = F.interpolate(kernel_feat, size=[seg_num_grid, seg_num_grid], mode='bilinear', align_corners=False, align_mode=0) cate_feat = kernel_feat[:, :-2, :, :] for kernel_layer in self.kernel_pred_convs: kernel_feat = F.relu(kernel_layer(kernel_feat)) if self.drop_block and self.training: kernel_feat = self.drop_block_fun(kernel_feat) kernel_pred = self.solo_kernel(kernel_feat) # cate branch for cate_layer in self.cate_pred_convs: cate_feat = F.relu(cate_layer(cate_feat)) if self.drop_block and self.training: cate_feat = self.drop_block_fun(cate_feat) cate_pred = self.solo_cate(cate_feat) if not self.training: cate_pred = self._points_nms(F.sigmoid(cate_pred), kernel_size=2) cate_pred = paddle.transpose(cate_pred, [0, 2, 3, 1]) return cate_pred, kernel_pred
def dft_matrix(n: int, return_complex: bool = False, dtype: str = 'float64') -> Tensor: """Compute discrete Fourier transform matrix. Parameters: n(int): the size of dft matrix. return_complex(bool): whether to return complex matrix. If True, the matrix will be complex type. Otherwise, the real and image part will be stored in the last axis of returned tensor. dtype(str): the datatype of the returned dft matrix. Shape: output: [n, n] or [n,n,2] Returns: Complex tensor of shape (n,n) if return_complex=True, and of shape (n,n,2) otherwise. Examples: .. code-block:: python import paddle import paddleaudio.functional as F m = F.dft_matrix(512) print(m.shape) >> [512, 512, 2] m = F.dft_matrix(512, return_complex=True) print(m.shape) >> [512, 512] """ # This is due to a bug in paddle in lacking support for complex128, as of paddle 2.1.0 if return_complex and dtype == 'float64': raise ValueError('not implemented') x, y = paddle.meshgrid(paddle.arange(0, n), paddle.arange(0, n)) z = x.astype(dtype) * y.astype(dtype) * paddle.to_tensor( (-2 * math.pi / n), dtype) cos = paddle.cos(z) sin = paddle.sin(z) if return_complex: return cos + paddle.to_tensor([1j]) * sin cos = cos.unsqueeze(-1) sin = sin.unsqueeze(-1) return paddle.concat([cos, sin], -1)
def get_single_level_center_point(self, featmap_size, stride, cell_offset=0): """ Generate pixel centers of a single stage feature map. Args: featmap_size: height and width of the feature map stride: down sample stride of the feature map Returns: y and x of the center points """ h, w = featmap_size x_range = (paddle.arange(w, dtype='float32') + cell_offset) * stride y_range = (paddle.arange(h, dtype='float32') + cell_offset) * stride y, x = paddle.meshgrid(y_range, x_range) y = y.flatten() x = x.flatten() return y, x
def generate_anchor(self, nGh, nGw, anchor_wh): nA = len(anchor_wh) yv, xv = paddle.meshgrid([paddle.arange(nGh), paddle.arange(nGw)]) mesh = paddle.stack((xv, yv), axis=0).cast(dtype='float32') # 2 x nGh x nGw meshs = paddle.tile(mesh, [nA, 1, 1, 1]) anchor_offset_mesh = anchor_wh[:, :, None][:, :, :, None].repeat( int(nGh), axis=-2).repeat(int(nGw), axis=-1) anchor_offset_mesh = paddle.to_tensor( anchor_offset_mesh.astype(np.float32)) # nA x 2 x nGh x nGw anchor_mesh = paddle.concat([meshs, anchor_offset_mesh], axis=1) anchor_mesh = paddle.transpose(anchor_mesh, [0, 2, 3, 1]) # (nA x nGh x nGw) x 4 return anchor_mesh
def _generate_anchors(self, feats): anchors, num_anchors_list = [], [] stride_tensor_list = [] for feat, stride in zip(feats, self.fpn_strides): _, _, h, w = feat.shape cell_half_size = self.grid_cell_scale * stride * 0.5 shift_x = (paddle.arange(end=w) + self.grid_cell_offset) * stride shift_y = (paddle.arange(end=h) + self.grid_cell_offset) * stride shift_y, shift_x = paddle.meshgrid(shift_y, shift_x) anchor = paddle.stack([ shift_x - cell_half_size, shift_y - cell_half_size, shift_x + cell_half_size, shift_y + cell_half_size ], axis=-1) anchors.append(anchor.reshape([-1, 4])) num_anchors_list.append(len(anchors[-1])) stride_tensor_list.append( paddle.full([num_anchors_list[-1], 1], stride)) return anchors, num_anchors_list, stride_tensor_list
def _generate_anchor_point(self, feat_sizes, strides, offset=0.): anchor_points, stride_tensor = [], [] num_anchors_list = [] for feat_size, stride in zip(feat_sizes, strides): h, w = feat_size x = (paddle.arange(w) + offset) * stride y = (paddle.arange(h) + offset) * stride y, x = paddle.meshgrid(y, x) anchor_points.append( paddle.stack([x, y], axis=-1).reshape([-1, 2])) stride_tensor.append( paddle.full([len(anchor_points[-1]), 1], stride, dtype=self._dtype)) num_anchors_list.append(len(anchor_points[-1])) anchor_points = paddle.concat(anchor_points).astype(self._dtype) anchor_points.stop_gradient = True stride_tensor = paddle.concat(stride_tensor) stride_tensor.stop_gradient = True return anchor_points, stride_tensor, num_anchors_list
def decode_outputs(self, outputs): grids = [] strides = [] for (hsize, wsize), stride in zip(self.hw, self.strides): yv, xv = paddle.meshgrid( [paddle.arange(hsize), paddle.arange(wsize)]) grid = paddle.reshape(paddle.stack((xv, yv), 2), (1, -1, 2)) grids.append(grid) shape = grid.shape[:2] strides.append(paddle.full((*shape, 1), stride)) grids = paddle.concat(grids, axis=1) strides = paddle.concat(strides, axis=1) grids = paddle.cast(grids, outputs.dtype) strides = paddle.cast(strides, outputs.dtype) outputs[:, :, :2] = (outputs[:, :, :2] + grids) * strides outputs[:, :, 2:4] = paddle.exp(outputs[:, :, 2:4]) * strides return outputs
def test_api_with_dygraph_list_input(self): paddle.disable_static(paddle.NPUPlace(0)) input_3 = np.random.randint(0, 100, [ 100, ]).astype('int32') input_4 = np.random.randint(0, 100, [ 200, ]).astype('int32') out_3 = np.reshape(input_3, [100, 1]) out_3 = np.broadcast_to(out_3, [100, 200]) out_4 = np.reshape(input_4, [1, 200]) out_4 = np.broadcast_to(out_4, [100, 200]) tensor_3 = paddle.to_tensor(input_3) tensor_4 = paddle.to_tensor(input_4) res_3, res_4 = paddle.meshgrid([tensor_3, tensor_4]) self.assertTrue(np.allclose(res_3.numpy(), out_3)) self.assertTrue(np.allclose(res_4.numpy(), out_4)) paddle.enable_static()
def masks_to_boxes(masks): """Compute the bounding boxes around the provided masks The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. Returns a [N, 4] tensors, with the boxes in xyxy format """ if masks.numel() == 0: return paddle.zeros((0, 4)).requires_grad_(False) h, w = masks.shape[-2:] y = paddle.arange(0, h, dtype='float32').requires_grad_(False) x = paddle.arange(0, w, dtype='float32').requires_grad_(False) y, x = paddle.meshgrid(y, x) x_mask = masks * x.unsqueeze(0) x_max = x_mask.flatten(1).max(-1)[0] x_min = x_mask.masked_fill(~masks.bool(), 100000000.0).flatten(1).min(-1)[0 ] y_mask = masks * y.unsqueeze(0) y_max = y_mask.flatten(1).max(-1)[0] y_min = y_mask.masked_fill(~masks.bool(), 100000000.0).flatten(1).min(-1)[0 ] return paddle.stacks([x_min, y_min, x_max, y_max], 1)
def _generate_anchors(self, feats=None): # just use in eval time anchor_points = [] stride_tensor = [] for i, stride in enumerate(self.fpn_stride): if feats is not None: _, _, h, w = feats[i].shape else: h = math.ceil(self.eval_size[0] / stride) w = math.ceil(self.eval_size[1] / stride) shift_x = paddle.arange(end=w) + self.cell_offset shift_y = paddle.arange(end=h) + self.cell_offset shift_y, shift_x = paddle.meshgrid(shift_y, shift_x) anchor_point = paddle.cast( paddle.stack( [shift_x, shift_y], axis=-1), dtype='float32') anchor_points.append(anchor_point.reshape([-1, 2])) stride_tensor.append( paddle.full( [h * w, 1], stride, dtype='float32')) anchor_points = paddle.concat(anchor_points) stride_tensor = paddle.concat(stride_tensor) return anchor_points, stride_tensor
def __init__(self, channels, scale): super(AntiAliasInterpolation2d, self).__init__() sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 self.ka = kernel_size // 2 self.kb = self.ka - 1 if kernel_size % 2 == 0 else self.ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] # The gaussian kernel is the product of the # gaussian function of each dimension. kernel = 1 meshgrids = paddle.meshgrid( [paddle.arange(size, dtype='float32') for size in kernel_size]) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= paddle.exp(-(mgrid - mean)**2 / (2 * std**2 + 1e-9)) # Make sure sum of values in gaussian kernel equals 1. kernel = kernel / paddle.sum(kernel) # Reshape to depthwise convolutional weight kernel = kernel.reshape((1, 1, *kernel.shape)) kernel = kernel.tile( (channels, *((1, ) * (len(kernel.shape) - 1)) )) # [1, 1, *kernel.shape] -> [channels, 1, *kernel.shape] self.kernel_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Assign(kernel), trainable=False) self.groups = channels self.scale = scale self.conv = nn.Conv2D(channels, channels, kernel_size=kernel.shape[-1], groups=self.groups, weight_attr=self.kernel_attr, bias_attr=False) self.conv.weight.set_value(kernel)
def forward(self, inputs, _inputs, _input): """ forward """ x, y, z = paddle.meshgrid([inputs, _inputs, _input]) return x + y + z
def forward(self, inputs, _inputs): """ forward """ x, y = paddle.meshgrid([inputs, _inputs]) return x + y
def make_grid(h, w, dtype): yv, xv = paddle.meshgrid([paddle.arange(h), paddle.arange(w)]) return paddle.stack((xv, yv), 2).cast(dtype=dtype)
def get_offset(self, anchors, featmap_size, stride): """ Args: anchors: [M,5] xc,yc,w,h,angle featmap_size: (feat_h, feat_w) stride: 8 Returns: """ anchors = paddle.reshape(anchors, [-1, 5]) # (NA,5) dtype = anchors.dtype feat_h = featmap_size[0] feat_w = featmap_size[1] pad = (self.kernel_size - 1) // 2 idx = paddle.arange(-pad, pad + 1, dtype=dtype) yy, xx = paddle.meshgrid(idx, idx) xx = paddle.reshape(xx, [-1]) yy = paddle.reshape(yy, [-1]) # get sampling locations of default conv xc = paddle.arange(0, feat_w, dtype=dtype) yc = paddle.arange(0, feat_h, dtype=dtype) yc, xc = paddle.meshgrid(yc, xc) xc = paddle.reshape(xc, [-1, 1]) yc = paddle.reshape(yc, [-1, 1]) x_conv = xc + xx y_conv = yc + yy # get sampling locations of anchors # x_ctr, y_ctr, w, h, a = np.unbind(anchors, dim=1) x_ctr = anchors[:, 0] y_ctr = anchors[:, 1] w = anchors[:, 2] h = anchors[:, 3] a = anchors[:, 4] x_ctr = paddle.reshape(x_ctr, [-1, 1]) y_ctr = paddle.reshape(y_ctr, [-1, 1]) w = paddle.reshape(w, [-1, 1]) h = paddle.reshape(h, [-1, 1]) a = paddle.reshape(a, [-1, 1]) x_ctr = x_ctr / stride y_ctr = y_ctr / stride w_s = w / stride h_s = h / stride cos, sin = paddle.cos(a), paddle.sin(a) dw, dh = w_s / self.kernel_size, h_s / self.kernel_size x, y = dw * xx, dh * yy xr = cos * x - sin * y yr = sin * x + cos * y x_anchor, y_anchor = xr + x_ctr, yr + y_ctr # get offset filed offset_x = x_anchor - x_conv offset_y = y_anchor - y_conv offset = paddle.stack([offset_y, offset_x], axis=-1) offset = paddle.reshape( offset, [feat_h * feat_w, self.kernel_size * self.kernel_size * 2]) offset = paddle.transpose(offset, [1, 0]) offset = paddle.reshape( offset, [1, self.kernel_size * self.kernel_size * 2, feat_h, feat_w]) return offset
def forward(self, x, offset, mask): in_C = self.in_channels out_C = self.out_channels stride = self.stride padding = self.padding # dilation = self.dilation groups = self.groups N, _, H, W = x.shape _, w_in, kH, kW = self.weight.shape out_W = (W + 2 * padding - (kW - 1)) // stride out_H = (H + 2 * padding - (kH - 1)) // stride # ================== 1.先对图片x填充得到填充后的图片pad_x ================== pad_x_H = H + padding * 2 + 1 pad_x_W = W + padding * 2 + 1 pad_x = F.pad(x, pad=[0, 0, 0, 0, padding, padding + 1, padding, padding + 1], value=0.0) # ================== 2.求所有采样点的坐标 ================== # 卷积核中心点在pad_x中的位置 y_outer, x_outer = paddle.meshgrid([paddle.arange(out_H), paddle.arange(out_W)]) y_outer = y_outer * stride + padding x_outer = x_outer * stride + padding start_pos_yx = paddle.stack((y_outer, x_outer), 2).cast(dtype='float32') # [out_H, out_W, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.unsqueeze(start_pos_yx, axis=[0, 3]) # [1, out_H, out_W, 1, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.tile(start_pos_yx, [N, 1, 1, kH * kW, 1]) # [N, out_H, out_W, kH*kW, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y = start_pos_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_x = start_pos_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y.stop_gradient = True start_pos_x.stop_gradient = True # 卷积核内部的偏移 half_W = (kW - 1) // 2 half_H = (kH - 1) // 2 y_inner, x_inner = paddle.meshgrid([paddle.arange(kH), paddle.arange(kW)]) y_inner -= half_H x_inner -= half_W filter_inner_offset_yx = paddle.stack((y_inner, x_inner), 2).cast(dtype='float32') # [kH, kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.reshape(filter_inner_offset_yx, (1, 1, 1, kH * kW, 2)) # [1, 1, 1, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.tile(filter_inner_offset_yx, [N, out_H, out_W, 1, 1]) # [N, out_H, out_W, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_y = filter_inner_offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_x = filter_inner_offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_y.stop_gradient = True filter_inner_offset_x.stop_gradient = True # 预测的偏移 offset = paddle.transpose(offset, [0, 2, 3, 1]) # [N, out_H, out_W, kH*kW*2] offset_yx = paddle.reshape(offset, (N, out_H, out_W, kH * kW, 2)) # [N, out_H, out_W, kH*kW, 2] offset_y = offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] offset_x = offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] # 最终采样位置。 pos_y = start_pos_y + filter_inner_offset_y + offset_y # [N, out_H, out_W, kH*kW, 1] pos_x = start_pos_x + filter_inner_offset_x + offset_x # [N, out_H, out_W, kH*kW, 1] pos_y = paddle.clip(pos_y, 0.0, H + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 pos_x = paddle.clip(pos_x, 0.0, W + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 # ================== 3.采样。用F.grid_sample()双线性插值采样。 ================== pos_x = pos_x / (pad_x_W - 1) * 2.0 - 1.0 pos_y = pos_y / (pad_x_H - 1) * 2.0 - 1.0 xtyt = paddle.concat([pos_x, pos_y], -1) # [N, out_H, out_W, kH*kW, 2] xtyt = paddle.reshape(xtyt, (N, out_H, out_W * kH * kW, 2)) # [N, out_H, out_W*kH*kW, 2] value = F.grid_sample(pad_x, xtyt, mode='bilinear', padding_mode='zeros', align_corners=True) # [N, in_C, out_H, out_W*kH*kW] value = paddle.reshape(value, (N, in_C, out_H, out_W, kH * kW)) # [N, in_C, out_H, out_W, kH * kW] value = value.transpose((0, 1, 4, 2, 3)) # [N, in_C, kH * kW, out_H, out_W] # ================== 4.乘以重要程度 ================== # 乘以重要程度 mask = paddle.unsqueeze(mask, [1]) # [N, 1, kH * kW, out_H, out_W] value = value * mask # [N, in_C, kH * kW, out_H, out_W] new_x = paddle.reshape(value, (N, in_C * kH * kW, out_H, out_W)) # [N, in_C * kH * kW, out_H, out_W] # ================== 5.乘以本层的权重,加上偏置 ================== # 1x1卷积 rw = paddle.reshape(self.weight, (out_C, w_in * kH * kW, 1, 1)) # [out_C, w_in, kH, kW] -> [out_C, w_in*kH*kW, 1, 1] 变成1x1卷积核 out = F.conv2d(new_x, rw, bias=self.bias, stride=1, groups=groups) # [N, out_C, out_H, out_W] return out