def exist_objs_3(keep, masks, classes, scores, upsampled_size_out, resize_shape, ori_shape): keep = L.reshape(keep, (-1, )) keep.stop_gradient = True masks = L.gather(masks, keep) # [M4, s4, s4] M4个物体的掩码概率 scores = L.gather(scores, keep) # [M4, ] M4个物体的分数 classes = L.gather(classes, keep) # [M4, ] M4个物体的类别id # 第五次过滤,只保留得分前cfg['max_per_img']个物体 _, sort_inds = L.argsort(scores, axis=-1, descending=True) sort_inds = sort_inds[:cfg['max_per_img']] sort_inds.stop_gradient = True masks = L.gather(masks, sort_inds) # [M5, s4, s4] M5个物体的掩码概率 scores = L.gather(scores, sort_inds) # [M5, ] M5个物体的分数 classes = L.gather(classes, sort_inds) # [M5, ] M5个物体的类别id masks = L.resize_bilinear( L.unsqueeze(masks, axes=[0]), out_shape=upsampled_size_out, align_corners=False, align_mode=0)[:, :, :resize_shape[0], :resize_shape[1]] # 去掉黑边 masks = L.resize_bilinear(masks, out_shape=ori_shape[:2], align_corners=False, align_mode=0) # 插值成原图大小 masks = L.cast(masks > cfg['mask_thr'], 'float32')[0] return masks, classes, scores
def test_resize_bilinear(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 9, 6], dtype="float32") output = layers.resize_bilinear(x, out_shape=[12, 12]) self.assertIsNotNone(output) output = layers.resize_bilinear(x, scale=3) self.assertIsNotNone(output) print(str(program))
def get_prediction(self, feats, eval=True): name_list = list(feats.keys()) feats2 = [feats[name] for name in name_list] # [p2, p3, p4, p5] feats = feats2 # 有5个张量,5个张量的strides=[8, 8, 16, 32, 32],所以先对首尾张量进行插值。 # 一定要设置align_corners=False, align_mode=0才能和原版SOLO输出一致。 new_feats = [ L.resize_bilinear(feats[0], out_shape=L.shape(feats[1])[2:], align_corners=False, align_mode=0), feats[1], feats[2], feats[3], L.resize_bilinear(feats[4], out_shape=L.shape(feats[3])[2:], align_corners=False, align_mode=0) ] kernel_preds, cls_preds = [], [] for idx in range(len(self.seg_num_grids)): krn_feat = new_feats[idx] # 给卷积核分支 # ============ kernel branch (卷积核分支) ============ ins_kernel_feat = concat_coord(krn_feat) # 带上坐标信息。[N, c+2, h, w] kernel_feat = ins_kernel_feat # ins_kernel_feat不再使用 seg_num_grid = self.seg_num_grids[idx] # 这个特征图一行(列)的格子数 # kernel_feat插值成格子图。 [N, c+2, seg_num_grid, seg_num_grid] kernel_feat = L.resize_bilinear( kernel_feat, out_shape=[seg_num_grid, seg_num_grid], align_corners=False, align_mode=0) # 扔掉插入的坐标那2个通道,作为cls_feat。 [N, c, seg_num_grid, seg_num_grid] cls_feat = kernel_feat[:, :-2, :, :] for kernel_layer in self.krn_convs: kernel_feat = kernel_layer(kernel_feat) for class_layer in self.cls_convs: cls_feat = class_layer(cls_feat) kernel_pred = kernel_feat # [N, 256, seg_num_grid, seg_num_grid] 每个格子的预测卷积核 cls_pred = cls_feat # [N, 80, seg_num_grid, seg_num_grid] 每个格子的预测概率,未进行sigmoid()激活 if eval: # [N, seg_num_grid, seg_num_grid, 80] 每个格子的预测概率,已进行sigmoid()激活 cls_pred = L.transpose(points_nms(L.sigmoid(cls_pred), kernel=2), perm=[0, 2, 3, 1]) kernel_preds.append(kernel_pred) cls_preds.append(cls_pred) return [kernel_preds, cls_preds]
def proto_net(x): x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.0.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.0.bias")) x = P.relu(x) x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.2.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.2.bias")) x = P.relu(x) x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.4.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.4.bias")) x = P.relu(x) x = P.resize_bilinear(x, scale=float(2)) x = P.relu(x) x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.8.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.8.bias")) x = P.relu(x) x = P.conv2d(x, 32, filter_size=(1, 1), stride=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.10.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.10.bias")) return x
def __call__(self, image): # Calculate new size. Ensure that it is even so that crop/pad becomes easier h_orig, w_orig = image.shape[2:] if h_orig != w_orig: raise NotImplementedError h_new = round(h_orig / self.scale_factor) h_new += (h_new - h_orig) % 2 w_new = round(w_orig / self.scale_factor) w_new += (w_new - w_orig) % 2 if isinstance(image, PTensor): image_resized = layers.resize_bilinear(image, [h_new, w_new], align_corners=False) else: image_resized = cv.resize(image, (w_new, h_new), interpolation=cv.INTER_LINEAR) return self.crop_to_output(image_resized)