def _get_output_single(self, input, idx): ins_kernel_feat = input # CoordConv x_range = paddle.linspace( -1, 1, paddle.shape(ins_kernel_feat)[-1], dtype='float32') y_range = paddle.linspace( -1, 1, paddle.shape(ins_kernel_feat)[-2], dtype='float32') y, x = paddle.meshgrid([y_range, x_range]) x = paddle.unsqueeze(x, [0, 1]) y = paddle.unsqueeze(y, [0, 1]) y = paddle.expand( y, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1]) x = paddle.expand( x, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1]) coord_feat = paddle.concat([x, y], axis=1) ins_kernel_feat = paddle.concat([ins_kernel_feat, coord_feat], axis=1) # kernel branch kernel_feat = ins_kernel_feat seg_num_grid = self.seg_num_grids[idx] kernel_feat = F.interpolate( kernel_feat, size=[seg_num_grid, seg_num_grid], mode='bilinear', align_corners=False, align_mode=0) cate_feat = kernel_feat[:, :-2, :, :] for kernel_layer in self.kernel_pred_convs: kernel_feat = F.relu(kernel_layer(kernel_feat)) if self.drop_block and self.training: kernel_feat = self.drop_block_fun(kernel_feat) kernel_pred = self.solo_kernel(kernel_feat) # cate branch for cate_layer in self.cate_pred_convs: cate_feat = F.relu(cate_layer(cate_feat)) if self.drop_block and self.training: cate_feat = self.drop_block_fun(cate_feat) cate_pred = self.solo_cate(cate_feat) if not self.training: cate_pred = self._points_nms(F.sigmoid(cate_pred), kernel_size=2) cate_pred = paddle.transpose(cate_pred, [0, 2, 3, 1]) return cate_pred, kernel_pred
def forward(self, x): inputs = self.backbone(x) inputs0 = self.conv0(inputs[0]) inputs1 = F.interpolate(self.conv1(inputs[1]), size=inputs[0].shape[2:], mode='bilinear', align_corners=True) inputs2 = F.interpolate(self.conv21(inputs[2]), scale_factor=2, mode='bilinear', align_corners=True) inputs2 = F.interpolate(self.conv22(inputs2), size=inputs[0].shape[2:], mode='bilinear', align_corners=True) inputs3 = F.interpolate(self.conv31(inputs[3]), scale_factor=2, mode='bilinear', align_corners=True) inputs3 = F.interpolate(self.conv32(inputs3), scale_factor=2, mode='bilinear', align_corners=True) inputs3 = F.interpolate(self.conv33(inputs3), size=inputs[0].shape[2:], mode='bilinear', align_corners=True) inputs2 = inputs2 + inputs3 inputs1 = inputs1 + inputs2 inputs0 = inputs0 + inputs1 feats = self.mlahead(inputs0, inputs1, inputs2, inputs3) logit = self.cls(feats) logit_list = [logit] if self.training: logit_list.append(self.aux_head(inputs[2])) logit_list = [ F.interpolate(logit, paddle.shape(x)[2:], mode='bilinear', align_corners=True) for logit in logit_list ] return logit_list
def forward(self, body_feats): dla_up_feats = self.dla_up(body_feats) ida_up_feats = [] for i in range(self.last_level - self.first_level): ida_up_feats.append(dla_up_feats[i].clone()) self.ida_up(ida_up_feats, 0, len(ida_up_feats)) feat = ida_up_feats[-1] if self.with_sge: feat = self.sge_attention(feat) if self.down_ratio != 4: feat = F.interpolate(feat, scale_factor=self.down_ratio // 4, mode="bilinear", align_corners=True) return feat
def forward(self, x): _, _, h, w = paddle.shape(x) _, _, c3, c4 = self.base_forward(x) logit_list = [] x, _ = self.head(c4) logit_list.append(x) if self.aux: auxout = self.auxlayer(c3) logit_list.append(auxout) return [ F.interpolate(logit, (h, w), mode='bilinear', align_corners=self.align_corners) for logit in logit_list ]
def _get_loss_mask(self, masks, gt_mask, match_indices, num_gts): # masks: [b, query, h, w], gt_mask: list[[n, H, W]] loss = dict() if sum(len(a) for a in gt_mask) == 0: loss['loss_mask'] = paddle.to_tensor([0.]) loss['loss_dice'] = paddle.to_tensor([0.]) return loss src_masks, target_masks = self._get_src_target_assign( masks, gt_mask, match_indices) src_masks = F.interpolate(src_masks.unsqueeze(0), size=target_masks.shape[-2:], mode="bilinear")[0] loss['loss_mask'] = self.loss_coeff['mask'] * F.sigmoid_focal_loss( src_masks, target_masks, paddle.to_tensor([num_gts], dtype='float32')) loss['loss_dice'] = self.loss_coeff['dice'] * self._dice_loss( src_masks, target_masks, num_gts) return loss
def test_case(self): import paddle if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() with fluid.dygraph.guard(place): input_data = np.random.random((2, 3, 6, 6)).astype("float32") scale_np = np.array([2, 2]).astype("int64") input_x = paddle.to_tensor(input_data) scale = paddle.to_tensor(scale_np) expect_res = bilinear_interp_np( input_data, out_h=12, out_w=12, align_corners=False) out = interpolate( x=input_x, scale_factor=scale, mode="bilinear", align_corners=False) self.assertTrue(np.allclose(out.numpy(), expect_res))
def forward(self, x): dfm = self.db(x) feat1, feat2, feat3, feat4, sfm = self.sb(x) logit1 = self.aux_head1(feat1) logit2 = self.aux_head2(feat2) logit3 = self.aux_head3(feat3) logit4 = self.aux_head4(feat4) logit = self.head(self.bga(dfm, sfm)) logit_list = [logit, logit1, logit2, logit3, logit4] logit_list = [ F.interpolate(logit, x.shape[2:], mode='bilinear', align_corners=True, align_mode=1) for logit in logit_list ] return logit_list
def _upsample_add(self, x, y): '''Upsample and add two feature maps. Args: x: (Variable) top feature map to be upsampled. y: (Variable) lateral feature map. Returns: (Variable) added feature map. Note in Pypaddle, when input size is odd, the upsampled feature map with `F.upsample(..., scale_factor=2, mode='nearest')` maybe not equal to the lateral feature map size. e.g. original input size: [N,_,15,15] -> conv2d feature map size: [N,_,8,8] -> upsampled feature map size: [N,_,16,16] So we choose bilinear upsample which supports arbitrary output sizes. ''' _, _, H, W = y.shape return F.interpolate( x, size=(H, W), mode='bilinear', align_corners=True) + y
def forward(self, input): if self.scale == 1.0: return input out = fluid.layers.pad2d(input=input, paddings=[self.ka, self.kb, self.ka, self.kb], mode='constant') out = self.conv(out) # TODO: fluid.layers.interpolate IS NOT SAME WITH F.interpolate due to align_corners==True, use fluid.layers.resize_nearest instead. if PP_v2: out = F.interpolate(out, scale_factor=self.scale, mode='NEAREST', align_corners=False) else: out = fluid.layers.resize_nearest(out, scale=self.scale, align_corners=False) return out
def forward(self, source_image, kp_driving, kp_source): # Encoding (downsampling) part out = self.first(source_image) for i in range(len(self.down_blocks)): out = self.down_blocks[i](out) # Transforming feature representation according to deformation and occlusion output_dict = {} if self.dense_motion_network is not None: dense_motion = self.dense_motion_network(source_image=source_image, kp_driving=kp_driving, kp_source=kp_source) output_dict['mask'] = dense_motion['mask'] output_dict['sparse_deformed'] = dense_motion['sparse_deformed'] if 'occlusion_map' in dense_motion: occlusion_map = dense_motion['occlusion_map'] output_dict['occlusion_map'] = occlusion_map else: occlusion_map = None deformation = dense_motion['deformation'] out = self.deform_input(out, deformation) if occlusion_map is not None: if out.shape[2] != occlusion_map.shape[2] or out.shape[ 3] != occlusion_map.shape[3]: occlusion_map = F.interpolate(occlusion_map, size=out.shape[2:], mode='BILINEAR', align_corners=False) out = out * occlusion_map output_dict["deformed"] = self.deform_input( source_image, deformation) # Decoding part out = self.bottleneck(out) for i in range(len(self.up_blocks)): out = self.up_blocks[i](out) out = self.final(out) out = F.sigmoid(out) output_dict["prediction"] = out return output_dict
def forward(self, blocks): assert len(blocks) == self.num_blocks blocks = blocks[::-1] yolo_feats = [] for i, block in enumerate(blocks): if i > 0: if self.data_format == 'NCHW': block = paddle.concat([route, block], axis=1) else: block = paddle.concat([route, block], axis=-1) route, tip = self.yolo_blocks[i](block) yolo_feats.append(tip) if i < self.num_blocks - 1: route = self.routes[i](route) route = F.interpolate(route, scale_factor=2., data_format=self.data_format) return yolo_feats
def forward(self, x): out1, out2, out3, out4, out5 = self.backbone(x) out1, out2, out3, out4, out5 = self.squeeze1(out1), self.squeeze2(out2), \ self.squeeze3(out3), self.squeeze4(out4), \ self.squeeze5(out5) out1, out2, out3, out4, out5 = self.se1(out1), self.se2(out2), \ self.se3(out3), self.se4(out4), \ self.se5(out5) out1, out2, out3, out4, out5 = self.fa1(out1), self.fa2(out2), \ self.fa3(out3), self.fa4(out4), \ self.fa5(out5) out1, out2, out3, out4, out5 = self.FMF1(out1, out2, out3, out4, out5) out1, out2, out3, out4, out5 = self.FMF2(out1, out2, out3, out4, out5) out1, out2, out3, out4, out5 = self.FMF3(out1, out2, out3, out4, out5) out = self.mso(out1, out2, out3, out4, out5) out = F.interpolate(self.linear(out), size=x.shape[2:], mode='bilinear', align_corners=True) return [out]
def save(self, img_path, save_path=None): with paddle.no_grad(): for num, (image, mask, (H, W), maskpath) in enumerate(self.loader): out = self.net(image) pred = F.sigmoid(out) k_pred = pred for num in range(len(H)): mae_pred = k_pred[num].unsqueeze(0) path = img_path + '/mask/' + maskpath[num] + '.png' mae_mask = paddle.to_tensor(self.read_img(path)).unsqueeze(0).unsqueeze(0) mae_pred = F.interpolate(mae_pred, size=mae_mask.shape[2:], mode='bilinear') if save_path: save_paths = os.path.join(save_path, self.cfg.datapath.split('/')[-1]) if not os.path.exists(save_paths): os.makedirs(save_paths) mae_pred = mae_pred[0].transpose((1, 2, 0)) * 255 cv2.imwrite(save_paths + '/' + maskpath[num], mae_pred.cpu().numpy())
def forward(self, input): """ inputs : x : input feature maps(B C W H) returns : out : self attention value + input feature attention: B N N (N is Width*Height) """ x = self.pool(input) N, C, H, W = x.shape proj_query = self.query_conv(x).reshape([N, -1, H * W]).transpose((0, 2, 1)) proj_key = self.key_conv(x).reshape([N, -1, H * W]) energy = paddle.bmm(proj_query, proj_key) energy = (self.key_channel ** -.5) * energy attention = self.softmax(energy - paddle.max(energy, axis=-1, keepdim=True)) # 防æ¢æº¢å‡º proj_value = self.value_conv(x).reshape([N, -1, H * W]) out = paddle.bmm(proj_value, attention.transpose((0, 2, 1))) out = out.reshape([N, C, H, W]) out = F.interpolate(out, [H * self.ds, W * self.ds]) out = out + input return out
def forward(self, body_feats): laterals = [] used_backbone_levels = len(self.spatial_scale) for i in range(used_backbone_levels): laterals.append(self.lateral_convs[i](body_feats[i])) used_backbone_levels = len(self.spatial_scale) for i in range(used_backbone_levels - 1, 0, -1): upsample = F.interpolate( laterals[i], scale_factor=2., mode='nearest', ) laterals[i - 1] += upsample fpn_output = [] for lvl in range(self.min_level, self.highest_backbone_level + 1): i = lvl - self.min_level fpn_output.append(self.fpn_convs[i](laterals[i])) spatial_scale = self.spatial_scale if self.num_outs > len(fpn_output): # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) if not self.has_extra_convs: fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2)) spatial_scale = spatial_scale + [spatial_scale[-1] * 0.5] # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) else: if self.use_c5: extra_source = body_feats[-1] else: extra_source = fpn_output[-1] fpn_output.append(self.fpn_convs[used_backbone_levels](extra_source)) spatial_scale = spatial_scale + [spatial_scale[-1] * 0.5] for i in range(used_backbone_levels + 1, self.num_outs): if self.relu_before_extra_convs: fpn_output.append(self.fpn_convs[i](F.relu(fpn_output[-1]))) else: fpn_output.append(self.fpn_convs[i](fpn_output[-1])) spatial_scale = spatial_scale + [spatial_scale[-1] * 0.5] return fpn_output, spatial_scale
def forward(self, body_feats): laterals = [] num_levels = len(body_feats) for i in range(num_levels): laterals.append(self.lateral_convs[i](body_feats[i])) for i in range(1, num_levels): lvl = num_levels - i upsample = F.interpolate( laterals[lvl], scale_factor=2., mode='nearest', ) laterals[lvl - 1] += upsample fpn_output = [] for lvl in range(num_levels): fpn_output.append(self.fpn_convs[lvl](laterals[lvl])) if self.extra_stage > 0: # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) if not self.has_extra_convs: assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs' fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2)) # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) else: if self.use_c5: extra_source = body_feats[-1] else: extra_source = fpn_output[-1] fpn_output.append(self.fpn_convs[num_levels](extra_source)) for i in range(1, self.extra_stage): if self.relu_before_extra_convs: fpn_output.append(self.fpn_convs[num_levels + i]( F.relu(fpn_output[-1]))) else: fpn_output.append(self.fpn_convs[num_levels + i]( fpn_output[-1])) return fpn_output
def forward(self, x, proxy): b, c, h, w = x.shape if self.scale > 1: x = self.pool(x) query = self.f_pixel(x).reshape([b, self.key_channels, -1]) query = query.transpose([0, 2, 1]) key = self.f_object(proxy).reshape([b, self.key_channels, -1]) value = self.f_down(proxy).reshape([b, self.key_channels, -1]) value = value.transpose([0, 2, 1]) sim_map = paddle.matmul(query, key) sim_map = (self.key_channels ** -.5) * sim_map sim_map = F.softmax(sim_map, axis=-1) context = paddle.matmul(sim_map, value) context = context.transpose([0, 2, 1]) context = context.reshape([b, self.key_channels, h, w]) context = self.f_up(context) if self.scale > 1: context = F.interpolate(context, size=[h, w], mode='bilinear', align_corners=self.align_corners) return context
def forward(self, body_feats): laterals = [] for lvl in range(self.min_level, self.max_level): laterals.append(self.lateral_convs[lvl](body_feats[lvl])) for i in range(self.min_level + 1, self.max_level): lvl = self.max_level + self.min_level - i upsample = F.interpolate( laterals[lvl], scale_factor=2., mode='nearest', ) laterals[lvl - 1] = laterals[lvl - 1] + upsample fpn_output = [] for lvl in range(self.min_level, self.max_level): fpn_output.append(self.fpn_convs[lvl](laterals[lvl])) extension = F.max_pool2d(fpn_output[-1], 1, stride=2) spatial_scale = self.spatial_scale + [self.spatial_scale[-1] * 0.5] fpn_output.append(extension) return fpn_output, spatial_scale
def local_pairwise_distances2(x, y, max_distance=9): """Computes pairwise squared l2 distances using a local search window. Naive implementation using map_fn. Used as a slow fallback for when correlation_cost is not available. Args: x: Float32 tensor of shape [height, width, feature_dim]. y: Float32 tensor of shape [height, width, feature_dim]. max_distance: Integer, the maximum distance in pixel coordinates per dimension which is considered to be in the search window. Returns: Float32 distances tensor of shape [height, width, (2 * max_distance + 1) ** 2]. """ ori_h, ori_w, _ = x.shape x = paddle.transpose(x, [2, 0, 1]).unsqueeze(0) x = F.avg_pool2d(x, (2, 2), (2, 2)) y = paddle.transpose(y, [2, 0, 1]).unsqueeze(0) y = F.avg_pool2d(y, (2, 2), (2, 2)) _, channels, height, width = x.shape padding_val = 1e20 padded_y = F.pad(y, (max_distance, max_distance, max_distance, max_distance), mode='constant', value=padding_val) offset_y = F.unfold(padded_y, kernel_sizes=[height, width]).reshape( [1, channels, height, width, -1]) x = x.reshape([1, channels, height, width, 1]) minus = x - offset_y dists = paddle.sum(paddle.multiply(minus, minus), axis=1).reshape([1, height, width, -1]).transpose([0, 3, 1, 2]) dists = (paddle.nn.functional.sigmoid(dists) - 0.5) * 2 dists = F.interpolate(dists, size=[ori_h, ori_w], mode='bilinear', align_corners=True) dists = dists.squeeze(0).transpose([1, 2, 0]) return dists
def forward(self, input): outs = [] residual_func_idx = 0 for i in range(self._actual_ch): residual = input[i] for j in range(len(self._in_channels)): if j > i: y = self.residual_func_list[residual_func_idx](input[j]) residual_func_idx += 1 y = F.interpolate(y, scale_factor=2**(j - i)) residual = paddle.add(x=residual, y=y) elif j < i: y = input[j] for k in range(i - j): y = self.residual_func_list[residual_func_idx](y) residual_func_idx += 1 residual = paddle.add(x=residual, y=y) residual = F.relu(residual) outs.append(residual) return outs
def forward(self, x): out = [] aux = None if self.resize_input: x = F.interpolate(x, size=[299, 299], align_corners=False, align_mode=0, mode='bilinear') if self.normalize_input: x = x * 2 - 1 for idx, block in enumerate(self.blocks): x = block(x) if self.aux_logits and (idx == 2): aux = self.AuxLogits(x) if idx in self.output_blocks: out.append(x) if idx == self.last_needed_block: break return out, aux
def forward(self, conv1_logit, x4): H = paddle.shape(x4)[2] W = paddle.shape(x4)[3] conv1_logit = F.interpolate(conv1_logit, size=[H, W], mode='bilinear', align_corners=True) conv1_logit = self.conv_x1(conv1_logit) x4 = self.conv_x4(x4) # 1, 512, 81,161 feats = paddle.concat([conv1_logit, x4], axis=1) y = self.conv0(feats) y = self.conv1(y) y = self.add(feats, y) y = self.relu(y) out = self.conv2d_list[0](y) for i in range(len(self.conv2d_list) - 1): out += self.conv2d_list[i + 1](y) return out
def resize_pos_embed(self, pos_embed, old_hw, new_hw): """ Resize pos_embed weight. Args: pos_embed (Tensor): the pos_embed weight old_hw (list[int]): the height and width of old pos_embed new_hw (list[int]): the height and width of new pos_embed Returns: Tensor: the resized pos_embed weight """ cls_pos_embed = pos_embed[:, :1, :] pos_embed = pos_embed[:, 1:, :] pos_embed = pos_embed.transpose([0, 2, 1]) pos_embed = pos_embed.reshape([1, -1, old_hw[0], old_hw[1]]) pos_embed = F.interpolate(pos_embed, new_hw, mode='bicubic', align_corners=False) pos_embed = pos_embed.flatten(2).transpose([0, 2, 1]) pos_embed = paddle.concat([cls_pos_embed, pos_embed], axis=1) return pos_embed
def forward(self, x): dfm = self.db(x) feat1, feat2, feat3, feat4, sfm = self.sb(x) logit = self.head(self.bga(dfm, sfm)) if not self.training: logit_list = [logit] else: logit1 = self.aux_head1(feat1) logit2 = self.aux_head2(feat2) logit3 = self.aux_head3(feat3) logit4 = self.aux_head4(feat4) logit_list = [logit, logit1, logit2, logit3, logit4] logit_list = [ F.interpolate(logit, paddle.shape(x)[2:], mode='bilinear', align_corners=self.align_corners) for logit in logit_list ] return logit_list
def _transform_inputs(self, inputs): """ Transform inputs for decoder. Args: inputs (list[Tensor]): List of multi-level img features. Returns: Tensor: The transformed inputs """ if self.input_transform == 'resize_concat': inputs = [inputs[i] for i in self.in_index] upsampled_inputs = [ F.interpolate(x, size=paddle.shape(inputs[0])[2:], mode='bilinear', align_corners=self.align_corners) for x in inputs ] inputs = paddle.concat(upsampled_inputs, axis=1) elif self.input_transform == 'multiple_select': inputs = [inputs[i] for i in self.in_index] else: inputs = inputs[self.in_index[0]] return inputs
def forward(self, blocks): assert len(blocks) == self.num_blocks blocks = blocks[::-1] # fpn fpn_feats = [] for i, block in enumerate(blocks): if i > 0: if self.data_format == 'NCHW': block = paddle.concat([route, block], axis=1) else: block = paddle.concat([route, block], axis=-1) route, tip = self.fpn_blocks[i](block) fpn_feats.append(tip) if i < self.num_blocks - 1: route = self.fpn_routes[i](route) route = F.interpolate(route, scale_factor=2., data_format=self.data_format) pan_feats = [ fpn_feats[-1], ] route = fpn_feats[self.num_blocks - 1] for i in reversed(range(self.num_blocks - 1)): block = fpn_feats[i] route = self.pan_routes[i](route) if self.data_format == 'NCHW': block = paddle.concat([route, block], axis=1) else: block = paddle.concat([route, block], axis=-1) route, tip = self.pan_blocks[i](block) pan_feats.append(tip) return pan_feats[::-1]
def forward(self, x): """Forward function.""" x = self.patch_embed(x['image']) _, _, Wh, Ww = x.shape if self.ape: # interpolate the position embedding to the corresponding size absolute_pos_embed = F.interpolate( self.absolute_pos_embed, size=(Wh, Ww), mode='bicubic') x = (x + absolute_pos_embed).flatten(2).transpose([0, 2, 1]) else: x = x.flatten(2).transpose([0, 2, 1]) x = self.pos_drop(x) outs = [] for i in range(self.num_layers): layer = self.layers[i] x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww) if i in self.out_indices: norm_layer = getattr(self, f'norm{i}') x_out = norm_layer(x_out) out = x_out.reshape((-1, H, W, self.num_features[i])).transpose( (0, 3, 1, 2)) outs.append(out) return tuple(outs)
def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 9, 4], dtype="float32") y = fluid.data(name="y", shape=[2, 6, 9, 4, 3], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[3], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[3], dtype="int32") scale_tensor = fluid.data(name="scale_tensor", shape=[1], dtype="float32") out1 = fluid.layers.resize_trilinear(y, out_shape=[12, 18, 8], data_format='NDHWC') out2 = fluid.layers.resize_trilinear(x, out_shape=[12, dim, 8]) out3 = fluid.layers.resize_trilinear(x, out_shape=shape_tensor) out4 = fluid.layers.resize_trilinear(x, out_shape=[4, 4, 8], actual_shape=actual_size) out5 = fluid.layers.resize_trilinear(x, scale=scale_tensor) out6 = interpolate(x, scale=scale_tensor, resample='TRILINEAR', data_format="NCDHW") out7 = interpolate(x, out_shape=[4, 4, 8], resample='TRILINEAR', data_format="NCDHW") out8 = interpolate(x, out_shape=shape_tensor, resample='TRILINEAR', data_format="NCDHW") x_data = np.random.random((2, 3, 6, 9, 4)).astype("float32") dim_data = np.array([18]).astype("int32") shape_data = np.array([12, 18, 8]).astype("int32") actual_size_data = np.array([12, 18, 8]).astype("int32") scale_data = np.array([2.0]).astype("float32") if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) results = exe.run(fluid.default_main_program(), feed={ "x": x_data, "y": np.transpose(x_data, (0, 2, 3, 4, 1)), "dim": dim_data, "shape_tensor": shape_data, "actual_size": actual_size_data, "scale_tensor": scale_data }, fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) expect_res = trilinear_interp_np(x_data, out_d=12, out_h=18, out_w=8, align_mode=1) self.assertTrue( np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 4, 1)))) for i in range(len(results) - 1): self.assertTrue(np.allclose(results[i + 1], expect_res))
def forward(self, x, feat_list, s_input): input_shape = paddle.shape(x) m1f = F.interpolate( s_input, input_shape[2:], mode='bilinear', align_corners=self.align_corners) l1, l2, l3 = [ feat_list[self.backbone_indices[i]] for i in range(1, len(self.backbone_indices)) ] s1 = F.interpolate( self.dsn1(l1), input_shape[2:], mode='bilinear', align_corners=self.align_corners) s2 = F.interpolate( self.dsn2(l2), input_shape[2:], mode='bilinear', align_corners=self.align_corners) s3 = F.interpolate( self.dsn3(l3), input_shape[2:], mode='bilinear', align_corners=self.align_corners) # Get image gradient im_arr = x.numpy().transpose((0, 2, 3, 1)) im_arr = ((im_arr * 0.5 + 0.5) * 255).astype(np.uint8) canny = np.zeros((input_shape[0], 1, input_shape[2], input_shape[3])) for i in range(input_shape[0]): canny[i] = cv2.Canny(im_arr[i], 10, 100) canny = canny / 255 canny = paddle.to_tensor(canny).astype('float32') canny.stop_gradient = True cs = self.res1(m1f) cs = F.interpolate( cs, input_shape[2:], mode='bilinear', align_corners=self.align_corners) cs = self.d1(cs) cs = self.gate1(cs, s1) cs = self.res2(cs) cs = F.interpolate( cs, input_shape[2:], mode='bilinear', align_corners=self.align_corners) cs = self.d2(cs) cs = self.gate2(cs, s2) cs = self.res3(cs) cs = F.interpolate( cs, input_shape[2:], mode='bilinear', align_corners=self.align_corners) cs = self.d3(cs) cs = self.gate3(cs, s3) cs = self.fuse(cs) cs = F.interpolate( cs, input_shape[2:], mode='bilinear', align_corners=self.align_corners) cs = F.sigmoid(cs) # Ouput of shape stream return [ cs, ]
def loss(self, logit_dict, label_dict, loss_func_dict=None): if loss_func_dict is None: loss_func_dict = defaultdict(list) loss_func_dict['glance'].append(nn.NLLLoss()) loss_func_dict['focus'].append(MRSD()) loss_func_dict['cm'].append(MRSD()) loss_func_dict['err'].append(paddleseg.models.MSELoss()) loss_func_dict['refine'].append(paddleseg.models.L1Loss()) loss = {} # glance loss computation # get glance label glance_label = F.interpolate(label_dict['trimap'], logit_dict['glance'].shape[2:], mode='nearest', align_corners=False) glance_label_trans = (glance_label == 128).astype('int64') glance_label_bg = (glance_label == 0).astype('int64') glance_label = glance_label_trans + glance_label_bg * 2 loss_glance = loss_func_dict['glance'][0]( paddle.log(logit_dict['glance'] + 1e-6), glance_label.squeeze(1)) loss['glance'] = loss_glance # focus loss computation focus_label = F.interpolate(label_dict['alpha'], logit_dict['focus'].shape[2:], mode='bilinear', align_corners=False) loss_focus = loss_func_dict['focus'][0](logit_dict['focus'], focus_label, glance_label_trans) loss['focus'] = loss_focus # collaborative matting loss loss_cm_func = loss_func_dict['cm'] # fusion_sigmoid loss loss_cm = loss_cm_func[0](logit_dict['fusion'], focus_label) loss['cm'] = loss_cm # error loss err = F.interpolate(logit_dict['error'], label_dict['alpha'].shape[2:], mode='bilinear', align_corners=False) err_label = (F.interpolate(logit_dict['fusion'], label_dict['alpha'].shape[2:], mode='bilinear', align_corners=False) - label_dict['alpha']).abs() loss_err = loss_func_dict['err'][0](err, err_label) loss['err'] = loss_err loss_all = 0.25 * loss_glance + 0.25 * loss_focus + 0.25 * loss_cm + loss_err # refine loss if self.if_refine: loss_refine = loss_func_dict['refine'][0](logit_dict['refine'], label_dict['alpha']) loss['refine'] = loss_refine loss_all = loss_all + loss_refine loss['all'] = loss_all return loss