def conv_tri(self, input, r): """ Convolves an image by a 2D triangle filter (the 1D triangle filter f is [1:r r+1 r:-1:1]/(r+1)^2, the 2D version is simply conv2(f,f')) """ if r <= 1: raise ValueError( '`r` should be greater than 1, but it is {}.'.format(r)) kernel = [ list(range(1, r + 1)) + [r + 1] + list(reversed(range(1, r + 1))) ] kernel = paddle.to_tensor(kernel).astype('float32') kernel = kernel / (r + 1)**2 input_ = F.pad(input, [1, 1, 0, 0], mode='replicate') input_ = F.pad(input_, [r, r, 0, 0], mode='reflect') input_ = [input_[:, :, :, :r], input, input_[:, :, :, -r:]] input_ = paddle.concat(input_, axis=3) tem = input_.clone() input_ = F.pad(input_, [0, 0, 1, 1], mode='replicate') input_ = F.pad(input_, [0, 0, r, r], mode='reflect') input_ = [input_[:, :, :r, :], tem, input_[:, :, -r:, :]] input_ = paddle.concat(input_, axis=2) c = input.shape[1] kernel_x = paddle.concat([kernel.unsqueeze((0, 1))] * c, axis=0) output = F.conv2d(input_, kernel_x, padding=0, groups=c) kernel_y = paddle.concat([kernel.t().unsqueeze((0, 1))] * c, axis=0) output = F.conv2d(output, kernel_y, padding=0, groups=c) return output
def forward(self, logits, label): """ Args: logits (Tensor): Logit tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. label (Tensor): Label tensor, the data type is int64. Shape is (N), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, D1, D2,..., Dk), k >= 1. Returns: loss """ boundary_targets = F.conv2d(paddle.unsqueeze(label, axis=1).astype('float32'), self.laplacian_kernel, padding=1) boundary_targets = paddle.clip(boundary_targets, min=0) boundary_targets = boundary_targets > 0.1 boundary_targets = boundary_targets.astype('float32') boundary_targets_x2 = F.conv2d(paddle.unsqueeze(label, axis=1).astype('float32'), self.laplacian_kernel, stride=2, padding=1) boundary_targets_x2 = paddle.clip(boundary_targets_x2, min=0) boundary_targets_x4 = F.conv2d(paddle.unsqueeze(label, axis=1).astype('float32'), self.laplacian_kernel, stride=4, padding=1) boundary_targets_x4 = paddle.clip(boundary_targets_x4, min=0) boundary_targets_x8 = F.conv2d(paddle.unsqueeze(label, axis=1).astype('float32'), self.laplacian_kernel, stride=8, padding=1) boundary_targets_x8 = paddle.clip(boundary_targets_x8, min=0) boundary_targets_x8_up = F.interpolate(boundary_targets_x8, boundary_targets.shape[2:], mode='nearest') boundary_targets_x4_up = F.interpolate(boundary_targets_x4, boundary_targets.shape[2:], mode='nearest') boundary_targets_x2_up = F.interpolate(boundary_targets_x2, boundary_targets.shape[2:], mode='nearest') boundary_targets_x2_up = boundary_targets_x2_up > 0.1 boundary_targets_x2_up = boundary_targets_x2_up.astype('float32') boundary_targets_x4_up = boundary_targets_x4_up > 0.1 boundary_targets_x4_up = boundary_targets_x4_up.astype('float32') boundary_targets_x8_up = boundary_targets_x8_up > 0.1 boundary_targets_x8_up = boundary_targets_x8_up.astype('float32') boudary_targets_pyramids = paddle.stack((boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), axis=1) boudary_targets_pyramids = paddle.squeeze(boudary_targets_pyramids, axis=2) boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids, self.fuse_kernel) boudary_targets_pyramid = boudary_targets_pyramid > 0.1 boudary_targets_pyramid = boudary_targets_pyramid.astype('float32') if logits.shape[-1] != boundary_targets.shape[-1]: logits = F.interpolate( logits, boundary_targets.shape[2:], mode='bilinear', align_corners=True) bce_loss = F.binary_cross_entropy_with_logits(logits, boudary_targets_pyramid) dice_loss = self.fixed_dice_loss_func(F.sigmoid(logits), boudary_targets_pyramid) detail_loss = bce_loss + dice_loss label.stop_gradient = True return detail_loss
def relprop(self, R, alpha): if self.X.shape[1] == 3: pw = paddle.clip(self.weight, min=0) nw = paddle.clip(self.weight, max=0) X = self.X # print(X.shape) # [1, 3, 224, 224] L = self.X * 0 + \ paddle.min(paddle.min(paddle.min(self.X, axis=1, keepdim=True), axis=2, keepdim=True), axis=3, keepdim=True) H = self.X * 0 + \ paddle.max(paddle.max(paddle.max(self.X, axis=1, keepdim=True), axis=2, keepdim=True), axis=3, keepdim=True) Za = F.conv2d(X, self.weight, bias=None, stride=self._stride, padding=self._padding) - \ F.conv2d(L, pw, bias=None, stride=self._stride, padding=self._padding) - \ F.conv2d(H, nw, bias=None, stride=self._stride, padding=self._padding) + 1e-9 S = R / Za C = X * self.gradprop2(S, self.weight) - L * \ self.gradprop2(S, pw) - H * self.gradprop2(S, nw) R = C else: beta = alpha - 1 pw = paddle.clip(self.weight, min=0) nw = paddle.clip(self.weight, max=0) px = paddle.clip(self.X, min=0) nx = paddle.clip(self.X, max=0) def f(w1, w2, x1, x2): Z1 = F.conv2d(x1, w1, bias=None, stride=self._stride, padding=self._padding) Z2 = F.conv2d(x2, w2, bias=None, stride=self._stride, padding=self._padding) S1 = safe_divide(R, Z1) S2 = safe_divide(R, Z2) C1 = x1 * self.gradprop(Z1, x1, S1)[0] C2 = x2 * self.gradprop(Z2, x2, S2)[0] return C1 + C2 activator_relevances = f(pw, nw, px, nx) inhibitor_relevances = f(nw, pw, px, nx) R = alpha * activator_relevances - beta * inhibitor_relevances return R
def forward(self, input, expand_ratio=None, channel=None): self.cur_config = {'expand_ratio': expand_ratio, 'channel': channel} in_nc = int(input.shape[1]) assert ( expand_ratio == None or channel == None ), "expand_ratio and channel CANNOT be NOT None at the same time." if expand_ratio != None: out_nc = int(expand_ratio * self.base_output_dim) elif channel != None: out_nc = int(channel) else: out_nc = self.conv[0]._out_channels weight = self.conv[0].weight[:in_nc] ### conv1 if self.conv[0].bias is not None: bias = self.conv[0].bias[:in_nc] else: bias = self.conv[0].bias conv0_out = F.conv2d( input, weight, bias, stride=self.conv[0]._stride, padding=self.conv[0]._padding, dilation=self.conv[0]._dilation, groups=in_nc, data_format=self.conv[0]._data_format) norm_out = self.conv[1](conv0_out) weight = self.conv[2].weight[:out_nc, :in_nc, :, :] if self.conv[2].bias is not None: bias = self.conv[2].bias[:out_nc] else: bias = self.conv[2].bias conv1_out = F.conv2d( norm_out, weight, bias, stride=self.conv[2]._stride, padding=self.conv[2]._padding, dilation=self.conv[2]._dilation, groups=self.conv[2]._groups, data_format=self.conv[2]._data_format) return conv1_out
def f(w1, w2, x1, x2): Z1 = F.conv2d(x1, w1, bias=None, stride=self._stride, padding=self._padding) Z2 = F.conv2d(x2, w2, bias=None, stride=self._stride, padding=self._padding) S1 = safe_divide(R, Z1) S2 = safe_divide(R, Z2) C1 = x1 * self.gradprop(Z1, x1, S1)[0] C2 = x2 * self.gradprop(Z2, x2, S2)[0] return C1 + C2
def static_graph_case_2(self): main = fluid.Program() start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: x = x = fluid.data("input", (-1, -1, -1, self.in_channels), dtype=self.dtype) else: x = fluid.data("input", (-1, self.in_channels, -1, -1), dtype=self.dtype) weight = fluid.data("weight", self.weight.shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) y = F.conv2d(x, weight, None if self.no_bias else bias, padding=self.padding, stride=self.stride, dilation=self.dilation, groups=self.groups, act=self.act, data_format=self.data_format, use_cudnn=self.use_cudnn) exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} if not self.no_bias: feed_dict["bias"] = self.bias out, = exe.run(main, feed=feed_dict, fetch_list=[y]) return out
def functional(self, place): main = fluid.Program() start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): input_shape = (-1, -1, -1,self.num_channels) \ if self.channel_last else (-1, self.num_channels, -1, -1) x_var = fluid.data("input", input_shape, dtype=self.dtype) w_var = fluid.data("weight", self.weight_shape, dtype=self.dtype) b_var = fluid.data("bias", (self.num_filters, ), dtype=self.dtype) y_var = F.conv2d(x_var, w_var, b_var if not self.no_bias else None, padding=self.padding, stride=self.stride, dilation=self.dilation, groups=self.groups, act=self.act, use_cudnn=self.use_cudnn, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias exe = fluid.Executor(place) exe.run(start) y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) return y_np
def forward(self, input): if self._act_preprocess is not None: input = self._act_preprocess(input) quant_input = self._fake_quant_input(input) weight = self.weight if self._weight_preprocess is not None: weight = self._weight_preprocess(self.weight) quant_weight = self._fake_quant_weight(weight) if self._padding_mode != 'zeros': quant_input = F.pad(quant_input, self._reversed_padding_repeated_twice, mode=self._padding_mode, data_format=self._data_format) self._padding = 0 return F.conv2d(quant_input, quant_weight, bias=self.bias, padding=self._padding, stride=self._stride, dilation=self._dilation, groups=self._groups, data_format=self._data_format)
def forward(self, x): """Compute the stft transform. Parameters ------------ x : Tensor [shape=(B, T)] The input waveform. Returns ------------ real : Tensor [shape=(B, C, 1, frames)] The real part of the spectrogram. imag : Tensor [shape=(B, C, 1, frames)] The image part of the spectrogram. """ # x(batch_size, time_steps) # pad it first with reflect mode # TODO(chenfeiyu): report an issue on paddle.flip pad_start = paddle.reverse(x[:, 1:1 + self.n_fft // 2], axis=[1]) pad_stop = paddle.reverse(x[:, -(1 + self.n_fft // 2):-1], axis=[1]) x = paddle.concat([pad_start, x, pad_stop], axis=-1) # to BC1T, C=1 x = paddle.unsqueeze(x, axis=[1, 2]) out = F.conv2d(x, self.weight, stride=(1, self.hop_length)) real, imag = paddle.chunk(out, 2, axis=1) # BC1T return real, imag
def static_graph_case(self): main = fluid.Program() start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): self.channel_last = self.data_format == "NHWC" if self.channel_last: x = x = fluid.data("input", (-1, -1, -1, self.in_channels), dtype=self.dtype) else: x = fluid.data("input", (-1, self.in_channels, -1, -1), dtype=self.dtype) weight = fluid.data("weight", self.weight_shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) y = F.conv2d(x, weight, None if self.no_bias else bias, padding=self.padding, stride=self.stride, dilation=self.dilation, groups=self.groups, data_format=self.data_format)
def forward(self, input): if self.scale == 1.0: return input out = F.pad(input, [self.ka, self.kb, self.ka, self.kb]) out = F.conv2d(out, weight=self.weight, groups=self.groups) out = F.interpolate(out, scale_factor=[self.scale, self.scale]) return out
def forward(self, input, style): batch, in_channel, height, width = input.shape style = self.modulation(style).reshape((batch, 1, in_channel, 1, 1)) weight = self.scale * self.weight * style if self.demodulate: demod = paddle.rsqrt((weight * weight).sum([2, 3, 4]) + 1e-8) weight = weight * demod.reshape((batch, self.out_channel, 1, 1, 1)) weight = weight.reshape((batch * self.out_channel, in_channel, self.kernel_size, self.kernel_size)) if self.upsample: input = input.reshape((1, batch * in_channel, height, width)) weight = weight.reshape((batch, self.out_channel, in_channel, self.kernel_size, self.kernel_size)) weight = weight.transpose((0, 2, 1, 3, 4)).reshape( (batch * in_channel, self.out_channel, self.kernel_size, self.kernel_size)) out = F.conv2d_transpose(input, weight, padding=0, stride=2, groups=batch) _, _, height, width = out.shape out = out.reshape((batch, self.out_channel, height, width)) out = self.blur(out) elif self.downsample: input = self.blur(input) _, _, height, width = input.shape input = input.reshape((1, batch * in_channel, height, width)) out = F.conv2d(input, weight, padding=0, stride=2, groups=batch) _, _, height, width = out.shape out = out.reshape((batch, self.out_channel, height, width)) else: input = input.reshape((1, batch * in_channel, height, width)) out = F.conv2d(input, weight, padding=self.padding, groups=batch) _, _, height, width = out.shape out = out.reshape((batch, self.out_channel, height, width)) return out
def f(R, w1, w2, x1, x2): R_nonzero = R.not_equal(ZERO_TENSOR).astype(R.dtype) Za1 = F.conv2d( x1, w1, bias=None, stride=self._stride, padding=self.padding) * R_nonzero Za2 = -F.conv2d( x1, w2, bias=None, stride=self._stride, padding=self.padding) * R_nonzero Zb1 = -F.conv2d( x2, w1, bias=None, stride=self._stride, padding=self.padding) * R_nonzero Zb2 = F.conv2d( x2, w2, bias=None, stride=self._stride, padding=self.padding) * R_nonzero C1 = pos_prop(R, Za1, Za2, x1) C2 = pos_prop(R, Zb1, Zb2, x2) return C1 + C2
def forward(self, input): out = F.conv2d( input, self.weight * self.scale, bias=self.bias, stride=self.stride, padding=self.padding, ) return out
def forward(self, inputs): conv_out = F.conv2d(inputs, self.weight_conv, padding=self._padding, stride=self._stride, dilation=self._dilation, groups=self._in_channels, data_format=self._data_format) out = F.conv2d(conv_out, self.weight_pointwise, bias=self.bias_pointwise, padding=0, stride=1, dilation=1, groups=1, data_format=self._data_format) return out
def compute_grad_mag(self, x): eps = 1e-6 n, c, h, w = x.shape if h <= 1 or w <= 1: raise ValueError( 'The width and height of tensor to compute grad must be greater than 1, but the shape is {}.' .format(x.shape)) x = self.conv_tri(x, r=4) kernel = [[-1, 0, 1]] kernel = paddle.to_tensor(kernel).astype('float32') kernel = 0.5 * kernel kernel_x = paddle.concat([kernel.unsqueeze((0, 1))] * c, axis=0) grad_x = F.conv2d(x, kernel_x, padding='same', groups=c) kernel_y = paddle.concat([kernel.t().unsqueeze((0, 1))] * c, axis=0) grad_y = F.conv2d(x, kernel_y, padding='same', groups=c) mag = paddle.sqrt(grad_x * grad_x + grad_y * grad_y + eps) return mag / mag.max()
def final_backward(R_p, pw, nw, X1): X = X1 L = X * 0 + \ paddle.min(paddle.min(paddle.min(X, dim=1, keepdim=True), dim=2, keepdim=True), dim=3, keepdim=True) H = X * 0 + \ paddle.max(paddle.max(paddle.max(X, dim=1, keepdim=True), dim=2, keepdim=True), dim=3, keepdim=True) Za = F.conv2d(X, self.weight, bias=None, stride=self._stride, padding=self._padding) - \ F.conv2d(L, pw, bias=None, stride=self._stride, padding=self._padding) - \ F.conv2d(H, nw, bias=None, stride=self._stride, padding=self._padding) Sp = safe_divide(R_p, Za) Rp = X * self.gradprop2(Sp, self.weight) - L * \ self.gradprop2(Sp, pw) - H * self.gradprop2(Sp, nw) return Rp
def dygraph_case(self): with dg.guard(): x = dg.to_variable(self.input, dtype=paddle.float32) w = dg.to_variable(self.filter, dtype=paddle.float32) b = None if self.bias is None else dg.to_variable( self.bias, dtype=paddle.float32) y = F.conv2d(x, w, b, padding=self.padding, stride=self.stride, dilation=self.dilation, groups=self.groups, data_format=self.data_format)
def forward(self, input): if self.scale == 1.0: return input out = F.pad(input, [self.ka, self.kb, self.ka, self.kb]) out = F.conv2d(out, weight=self.weight, groups=self.groups) out.stop_gradient = False inv_scale = 1 / self.scale int_inv_scale = int(inv_scale) assert (inv_scale == int_inv_scale) # out = out[:, :, ::int_inv_scale, ::int_inv_scale] # patch end out = paddle.fluid.layers.resize_nearest(out, scale=self.scale) return out
def forward(self, x): generated_filter = self.filter_gen_conv(self.avg_pool(x)) x = self.input_redu_conv(x) b, c, h, w = x.shape x = x.reshape([1, b * c, h, w]) generated_filter = generated_filter.reshape( [b * c, 1, self.filter_size, self.filter_size]) x = F.pad(x, self.pad, mode='constant', value=0) output = F.conv2d(x, weight=generated_filter, groups=b * c) output = output.reshape([b, self.channels, h, w]) output = self.norm(output) output = self.act(output) if self.fusion: output = self.fusion_conv(output) return output
def forward(self, x): ih, iw = x.shape[-2:] kh, kw = self.weight.shape[-2:] sh, sw = self.stride oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) pad_h = max( (oh - 1) * self.stride[0] + (kh - 1) * self._dilation[0] + 1 - ih, 0) pad_w = max( (ow - 1) * self.stride[1] + (kw - 1) * self._dilation[1] + 1 - iw, 0) if pad_h > 0 or pad_w > 0: x = F.pad(x, [ pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 ]) return F.conv2d(x, self.weight, self.bias, self.stride, self._padding, self._dilation, self._groups)
def dygraph_case(self): with dg.guard(self.place): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) y = F.conv2d(x, weight, bias, padding=self.padding, stride=self.stride, dilation=self.dilation, act=self.act, groups=self.groups, data_format=self.data_format, use_cudnn=self.use_cudnn) out = y.numpy() return out
def forward(self, input, kernel_size=None, expand_ratio=None, channel=None): self.cur_config = { 'kernel_size': kernel_size, 'expand_ratio': expand_ratio, 'channel': channel } in_nc = int(input.shape[1]) assert ( expand_ratio == None or channel == None ), "expand_ratio and channel CANNOT be NOT None at the same time." if expand_ratio != None: out_nc = int(expand_ratio * self.base_channel) elif channel != None: out_nc = int(channel) else: out_nc = self._out_channels ks = int(self._kernel_size[0]) if kernel_size == None else int( kernel_size) groups, weight_in_nc, weight_out_nc = self.get_groups_in_out_nc(in_nc, out_nc) weight = self.get_active_filter(weight_in_nc, weight_out_nc, ks) if kernel_size != None or 'kernel_size' in self.candidate_config.keys(): padding = convert_to_list(get_same_padding(ks), 2) else: padding = self._padding if self.bias is not None: bias = self.bias[:out_nc] else: bias = self.bias out = F.conv2d( input, weight, bias=bias, stride=self._stride, padding=padding, dilation=self._dilation, groups=self._groups, data_format=self._data_format) return out
def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1): _, channel, in_h, in_w = input.shape input = input.reshape((-1, in_h, in_w, 1)) _, in_h, in_w, minor = input.shape kernel_h, kernel_w = kernel.shape out = input.reshape((-1, in_h, 1, in_w, 1, minor)) out = out.transpose((0, 1, 3, 5, 2, 4)) out = out.reshape((-1, 1, 1, 1)) out = F.pad(out, [0, up_x - 1, 0, up_y - 1]) out = out.reshape((-1, in_h, in_w, minor, up_y, up_x)) out = out.transpose((0, 3, 1, 4, 2, 5)) out = out.reshape((-1, minor, in_h * up_y, in_w * up_x)) out = F.pad( out, [max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) out = out[:, :, max(-pad_y0, 0):out.shape[2] - max(-pad_y1, 0), max(-pad_x0, 0):out.shape[3] - max(-pad_x1, 0), ] out = out.reshape( ([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1])) w = paddle.flip(kernel, [0, 1]).reshape((1, 1, kernel_h, kernel_w)) out = F.conv2d(out, w) out = out.reshape(( -1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, )) out = out.transpose((0, 2, 3, 1)) out = out[:, ::down_y, ::down_x, :] out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 return out.reshape((-1, channel, out_h, out_w))
def add_input(self, x_row, condition_row): """Compute the output for a row and update the buffer. Parameters ---------- x_row : Tensor [shape=(batch_size, channel, 1, width)] A row of the input. condition_row : Tensor [shape=(batch_size, condition_channel, 1, width)] A row of the condition. Returns ------- res : Tensor [shape=(batch_size, channel, 1, width)] A row of the the residual output. skip : Tensor [shape=(batch_size, channel, 1, width)] A row of the skip output. """ x_row_in = x_row if self._conv_buffer is None: self._init_buffer(x_row) self._update_buffer(x_row) rw = self.rw x_row = F.conv2d( self._conv_buffer, self.conv.weight, self.conv.bias, padding=[0, 0, rw // 2, (rw - 1) // 2], dilation=self.dilations) x_row += self.condition_proj(condition_row) content, gate = paddle.chunk(x_row, 2, axis=1) x_row = paddle.tanh(content) * F.sigmoid(gate) x_row = self.out_proj(x_row) res, skip = paddle.chunk(x_row, 2, axis=1) res = x_row_in + res return res, skip
def forward(self, input): if self.scale == 1.0: return input out = F.pad(input, [self.ka, self.kb, self.ka, self.kb]) out = F.conv2d(out, weight=self.weight, groups=self.groups) out.stop_gradient = False # The high version of pytorch has a bug that affects the convergence of this model # original code # out = F.interpolate(out, scale_factor=[self.scale, self.scale]) # original code end # a patch 'might be' work for this bug. # see https://github.com/AliaksandrSiarohin/first-order-model/issues/146#issue-624354694 inv_scale = 1 / self.scale int_inv_scale = int(inv_scale) assert (inv_scale == int_inv_scale) out = out[:, :, ::int_inv_scale, ::int_inv_scale] # patch end return out
def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size, im_shape, scale_factor): h = paddle.cast(im_shape[0], 'int32')[0] w = paddle.cast(im_shape[1], 'int32')[0] upsampled_size_out = [featmap_size[0] * 4, featmap_size[1] * 4] y = paddle.zeros(shape=paddle.shape(cate_preds), dtype='float32') inds = paddle.where(cate_preds > self.score_threshold, cate_preds, y) inds = paddle.nonzero(inds) if paddle.shape(inds)[0] == 0: out = paddle.full(shape=[1], fill_value=-1) return out, out, out cate_preds = paddle.reshape(cate_preds, shape=[-1]) # Prevent empty and increase fake data ind_a = paddle.cast(paddle.shape(kernel_preds)[0], 'int64') ind_b = paddle.zeros(shape=[1], dtype='int64') inds_end = paddle.unsqueeze(paddle.concat([ind_a, ind_b]), 0) inds = paddle.concat([inds, inds_end]) kernel_preds_end = paddle.ones(shape=[1, self.kernel_out_channels], dtype='float32') kernel_preds = paddle.concat([kernel_preds, kernel_preds_end]) cate_preds = paddle.concat( [cate_preds, paddle.zeros(shape=[1], dtype='float32')]) # cate_labels & kernel_preds cate_labels = inds[:, 1] kernel_preds = paddle.gather(kernel_preds, index=inds[:, 0]) cate_score_idx = paddle.add(inds[:, 0] * 80, cate_labels) cate_scores = paddle.gather(cate_preds, index=cate_score_idx) size_trans = np.power(self.seg_num_grids, 2) strides = [] for _ind in range(len(self.segm_strides)): strides.append( paddle.full(shape=[int(size_trans[_ind])], fill_value=self.segm_strides[_ind], dtype="int32")) strides = paddle.concat(strides) strides = paddle.gather(strides, index=inds[:, 0]) # mask encoding. kernel_preds = paddle.unsqueeze(kernel_preds, [2, 3]) seg_preds = F.conv2d(seg_preds, kernel_preds) seg_preds = F.sigmoid(paddle.squeeze(seg_preds, [0])) seg_masks = seg_preds > self.mask_threshold seg_masks = paddle.cast(seg_masks, 'float32') sum_masks = paddle.sum(seg_masks, axis=[1, 2]) y = paddle.zeros(shape=paddle.shape(sum_masks), dtype='float32') keep = paddle.where(sum_masks > strides, sum_masks, y) keep = paddle.nonzero(keep) keep = paddle.squeeze(keep, axis=[1]) # Prevent empty and increase fake data keep_other = paddle.concat( [keep, paddle.cast(paddle.shape(sum_masks)[0] - 1, 'int64')]) keep_scores = paddle.concat( [keep, paddle.cast(paddle.shape(sum_masks)[0], 'int64')]) cate_scores_end = paddle.zeros(shape=[1], dtype='float32') cate_scores = paddle.concat([cate_scores, cate_scores_end]) seg_masks = paddle.gather(seg_masks, index=keep_other) seg_preds = paddle.gather(seg_preds, index=keep_other) sum_masks = paddle.gather(sum_masks, index=keep_other) cate_labels = paddle.gather(cate_labels, index=keep_other) cate_scores = paddle.gather(cate_scores, index=keep_scores) # mask scoring. seg_mul = paddle.cast(seg_preds * seg_masks, 'float32') seg_scores = paddle.sum(seg_mul, axis=[1, 2]) / sum_masks cate_scores *= seg_scores # Matrix NMS seg_preds, cate_scores, cate_labels = self.mask_nms( seg_preds, seg_masks, cate_labels, cate_scores, sum_masks=sum_masks) ori_shape = im_shape[:2] / scale_factor + 0.5 ori_shape = paddle.cast(ori_shape, 'int32') seg_preds = F.interpolate(paddle.unsqueeze(seg_preds, 0), size=upsampled_size_out, mode='bilinear', align_corners=False, align_mode=0) seg_preds = paddle.slice(seg_preds, axes=[2, 3], starts=[0, 0], ends=[h, w]) seg_masks = paddle.squeeze(F.interpolate(seg_preds, size=ori_shape[:2], mode='bilinear', align_corners=False, align_mode=0), axis=[0]) # TODO: support bool type seg_masks = paddle.cast(seg_masks > self.mask_threshold, 'int32') return seg_masks, cate_labels, cate_scores
def forward(self, x, offset, mask): in_C = self.in_channels out_C = self.out_channels stride = self.stride padding = self.padding # dilation = self.dilation groups = self.groups N, _, H, W = x.shape _, w_in, kH, kW = self.weight.shape out_W = (W + 2 * padding - (kW - 1)) // stride out_H = (H + 2 * padding - (kH - 1)) // stride # ================== 1.先对图片x填充得到填充后的图片pad_x ================== pad_x_H = H + padding * 2 + 1 pad_x_W = W + padding * 2 + 1 pad_x = F.pad(x, pad=[0, 0, 0, 0, padding, padding + 1, padding, padding + 1], value=0.0) # ================== 2.求所有采样点的坐标 ================== # 卷积核中心点在pad_x中的位置 y_outer, x_outer = paddle.meshgrid([paddle.arange(out_H), paddle.arange(out_W)]) y_outer = y_outer * stride + padding x_outer = x_outer * stride + padding start_pos_yx = paddle.stack((y_outer, x_outer), 2).cast(dtype='float32') # [out_H, out_W, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.unsqueeze(start_pos_yx, axis=[0, 3]) # [1, out_H, out_W, 1, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_yx = paddle.tile(start_pos_yx, [N, 1, 1, kH * kW, 1]) # [N, out_H, out_W, kH*kW, 2] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y = start_pos_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_x = start_pos_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 仅仅是卷积核中心点在pad_x中的位置 start_pos_y.stop_gradient = True start_pos_x.stop_gradient = True # 卷积核内部的偏移 half_W = (kW - 1) // 2 half_H = (kH - 1) // 2 y_inner, x_inner = paddle.meshgrid([paddle.arange(kH), paddle.arange(kW)]) y_inner -= half_H x_inner -= half_W filter_inner_offset_yx = paddle.stack((y_inner, x_inner), 2).cast(dtype='float32') # [kH, kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.reshape(filter_inner_offset_yx, (1, 1, 1, kH * kW, 2)) # [1, 1, 1, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_yx = paddle.tile(filter_inner_offset_yx, [N, out_H, out_W, 1, 1]) # [N, out_H, out_W, kH*kW, 2] 卷积核内部的偏移 filter_inner_offset_y = filter_inner_offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_x = filter_inner_offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] 卷积核内部的偏移 filter_inner_offset_y.stop_gradient = True filter_inner_offset_x.stop_gradient = True # 预测的偏移 offset = paddle.transpose(offset, [0, 2, 3, 1]) # [N, out_H, out_W, kH*kW*2] offset_yx = paddle.reshape(offset, (N, out_H, out_W, kH * kW, 2)) # [N, out_H, out_W, kH*kW, 2] offset_y = offset_yx[:, :, :, :, :1] # [N, out_H, out_W, kH*kW, 1] offset_x = offset_yx[:, :, :, :, 1:] # [N, out_H, out_W, kH*kW, 1] # 最终采样位置。 pos_y = start_pos_y + filter_inner_offset_y + offset_y # [N, out_H, out_W, kH*kW, 1] pos_x = start_pos_x + filter_inner_offset_x + offset_x # [N, out_H, out_W, kH*kW, 1] pos_y = paddle.clip(pos_y, 0.0, H + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 pos_x = paddle.clip(pos_x, 0.0, W + padding * 2 - 1.0) # 最终采样位置限制在pad_x内 # ================== 3.采样。用F.grid_sample()双线性插值采样。 ================== pos_x = pos_x / (pad_x_W - 1) * 2.0 - 1.0 pos_y = pos_y / (pad_x_H - 1) * 2.0 - 1.0 xtyt = paddle.concat([pos_x, pos_y], -1) # [N, out_H, out_W, kH*kW, 2] xtyt = paddle.reshape(xtyt, (N, out_H, out_W * kH * kW, 2)) # [N, out_H, out_W*kH*kW, 2] value = F.grid_sample(pad_x, xtyt, mode='bilinear', padding_mode='zeros', align_corners=True) # [N, in_C, out_H, out_W*kH*kW] value = paddle.reshape(value, (N, in_C, out_H, out_W, kH * kW)) # [N, in_C, out_H, out_W, kH * kW] value = value.transpose((0, 1, 4, 2, 3)) # [N, in_C, kH * kW, out_H, out_W] # ================== 4.乘以重要程度 ================== # 乘以重要程度 mask = paddle.unsqueeze(mask, [1]) # [N, 1, kH * kW, out_H, out_W] value = value * mask # [N, in_C, kH * kW, out_H, out_W] new_x = paddle.reshape(value, (N, in_C * kH * kW, out_H, out_W)) # [N, in_C * kH * kW, out_H, out_W] # ================== 5.乘以本层的权重,加上偏置 ================== # 1x1卷积 rw = paddle.reshape(self.weight, (out_C, w_in * kH * kW, 1, 1)) # [out_C, w_in, kH, kW] -> [out_C, w_in*kH*kW, 1, 1] 变成1x1卷积核 out = F.conv2d(new_x, rw, bias=self.bias, stride=1, groups=groups) # [N, out_C, out_H, out_W] return out
def filter2D(input: paddle.Tensor, kernel: paddle.Tensor, border_type: str = 'reflect', normalized: bool = False) -> paddle.Tensor: r"""Convolve a tensor with a 2d kernel. The function applies a given kernel to a tensor. The kernel is applied independently at each depth channel of the tensor. Before applying the kernel, the function applies padding according to the specified mode so that the output remains in the same shape. Args: input (paddle.Tensor): the input tensor with shape of :math:`(B, C, H, W)`. kernel (paddle.Tensor): the kernel to be convolved with the input tensor. The kernel shape must be :math:`(1, kH, kW)` or :math:`(B, kH, kW)`. border_type (str): the padding mode to be applied before convolving. The expected modes are: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'reflect'``. normalized (bool): If True, kernel will be L1 normalized. Return: paddle.Tensor: the convolved tensor of same size and numbers of channels as the input with shape :math:`(B, C, H, W)`. Example: >>> input = paddle.tensor([[[ ... [0., 0., 0., 0., 0.], ... [0., 0., 0., 0., 0.], ... [0., 0., 5., 0., 0.], ... [0., 0., 0., 0., 0.], ... [0., 0., 0., 0., 0.],]]]) >>> kernel = paddle.ones(1, 3, 3) >>> kornia.filter2D(input, kernel) paddle.tensor([[[[0., 0., 0., 0., 0.] [0., 5., 5., 5., 0.] [0., 5., 5., 5., 0.] [0., 5., 5., 5., 0.] [0., 0., 0., 0., 0.]]]]) """ testing.check_is_tensor(input) testing.check_is_tensor(kernel) if not isinstance(border_type, str): raise TypeError("Input border_type is not string. Got {}".format( type(kernel))) if not len(input.shape) == 4: raise ValueError( "Invalid input shape, we expect BxCxHxW. Got: {}".format( input.shape)) if not len(kernel.shape) == 3 and kernel.shape[0] != 1: raise ValueError( "Invalid kernel shape, we expect 1xHxW. Got: {}".format( kernel.shape)) # prepare kernel b, c, h, w = input.shape tmp_kernel: paddle.Tensor = kernel.unsqueeze(1) if normalized: tmp_kernel = normalize_kernel2d(tmp_kernel) tmp_kernel = tmp_kernel.expand([-1, c, -1, -1]) # pad the input tensor height, width = tmp_kernel.shape[-2:] padding_shape: List[int] = compute_padding([height, width]) # TODO: The Op pad3d_grad doesn't have any grad op for gradient penalty in current paddle version # input_pad: paddle.Tensor = F.pad(input, padding_shape, mode=border_type) input_pad = input # kernel and input tensor reshape to align element-wise or batch-wise params tmp_kernel = tmp_kernel.reshape([-1, 1, height, width]) input_pad = input_pad.reshape( [-1, tmp_kernel.shape[0], input_pad.shape[-2], input_pad.shape[-1]]) # convolve the tensor with the kernel. # TODO: The Op depthwise_conv2d_grad and pad3d_grad doesn't have any grad op for gradient penalty in current paddle version # output = F.conv2d(input_pad, tmp_kernel, groups=tmp_kernel.shape[0], padding=0, stride=1) input_pad = input_pad.reshape( [-1, 1, input_pad.shape[-2], input_pad.shape[-1]]) tmp_kernel = tmp_kernel[:1] output = F.conv2d(input_pad, tmp_kernel, padding=padding_shape, stride=1) return output.reshape([b, c, h, w])
print('================= 逐元素乘、1x1卷积、yolact中的矩阵乘法 ==================') N = 2 in_C = 3 out_C = 8 H = 28 W = 28 kH = 1 kW = 1 C = out_C # w = paddle.randn((C, in_C, kH, kW)) x = paddle.randn((N, in_C, H, W)) w = paddle.randn((C, in_C, 1, 1)) y = F.conv2d(x, w) x_in = L.reshape(x, (N, 1, in_C, H, W)) w_r = L.reshape(w, (1, C, in_C, 1, 1)) y2 = x_in * w_r # [N, C, in_C, H, W] y2 = L.reduce_sum(y2, dim=[ 2, ]) x_in2 = L.transpose(x, [0, 2, 3, 1]) # [N, H, W, in_C] w_r2 = L.reshape(w, (C, in_C)) w_r2 = L.transpose(w_r2, [1, 0]) # [in_C, C] y3 = L.matmul(x_in2, w_r2) # [N, H, W, C] y3 = L.transpose(y3, [0, 3, 1, 2]) # [N, C, H, W] y = y.numpy()