def __call__(self, net, bboxes): ''' bboxes [batch_size,X,4] (ymin,xmin,ymax,xmax)相对坐标 net:[batch_size,X,4] 输出:[Y,pool_height,pool_width,num_channels] //num_channels为fmap的通道数, Y=batch_size*X ''' with tf.name_scope("WROIPooling"): batch_size, H, W, C = wmlt.combined_static_and_dynamic_shape(net) _, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(bboxes) batch_index = _make_batch_index_for_pooler(bboxes) batch_index = tf.reshape(batch_index, [-1, 1]) batch_index = tf.cast(batch_index, tf.float32) bboxes = tf.reshape(bboxes, [-1, 4]) absolute_bboxes = boxes_relative_to_absolute(bboxes, width=W, height=H) absolute_bboxes = tf.transpose(absolute_bboxes) absolute_bboxes = tf.gather(absolute_bboxes, [1, 0, 3, 2]) absolute_bboxes = tf.transpose(absolute_bboxes) bboxes_with_batch_index = tf.concat( values=[batch_index, absolute_bboxes], axis=1) pool_height, pool_width = self.output_size width = pool_width * self.bin_size[1] height = pool_height * self.bin_size[0] net = roi_pooling(net, bboxes_with_batch_index, pool_height=height, pool_width=width) net = tf.reshape(net, [batch_size * box_nr, height, width, C]) if self.bin_size[0] > 1 and self.bin_size[1] > 1: net = tf.nn.max_pool(net, ksize=[1] + self.bin_size + [1], strides=[1] + self.bin_size + [1], padding="SAME") return net
def forward(self, features, batched_inputs): normalizer_fn, normalizer_params = odt.get_norm( "evo_norm_s0", is_training=self.is_training) res = [] with tf.variable_scope("BalanceBackboneHook"): del batched_inputs ref_index = 1 end_points = list(features) v0 = end_points[ref_index] mfeatures = [] with tf.name_scope("fusion"): shape0 = wmlt.combined_static_and_dynamic_shape(v0) for i, v in enumerate(end_points): if i == ref_index: net = v else: net = tf.image.resize_bilinear(v, shape0[1:3], name=f"resize{i}") mfeatures.append(net) net = tf.add_n(mfeatures) / float(len(mfeatures)) net = slim.conv2d(net, net.get_shape().as_list()[-1], [3, 3], activation_fn=None, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope=f"smooth") for i, v in enumerate(end_points): with tf.name_scope(f"merge{i}"): shape = wmlt.combined_static_and_dynamic_shape(v) v0 = tf.image.resize_bilinear(net, shape[1:3]) res.append(v + v0) return res
def get_pred_mask_for_inference(pred_coeff, protonet): shape = wmlt.combined_static_and_dynamic_shape(protonet) pn = tf.reshape(protonet, [shape[0] * shape[1], shape[2]]) mask = tf.matmul(pred_coeff, pn, transpose_b=True) mask_shape = wmlt.combined_static_and_dynamic_shape(mask) mask = tf.reshape(mask, [mask_shape[0], shape[0], shape[1]]) return mask
def mask_rcnn_loss_old(inputs, pred_mask_logits, proposals: EncodedData, fg_selection_mask, log=True): ''' :param inputs:inputs[GT_MASKS] [batch_size,N,H,W] :param pred_mask_logits: [Y,H,W,C] C==1 if cls_anostic_mask else num_classes, H,W is the size of mask not the position in org image :param proposals:proposals.indices:[batch_size,M], proposals.boxes [batch_size,M],proposals.gt_object_logits:[batch_size,M] :param fg_selection_mask: [X] X = batch_size*M Y = tf.reduce_sum(fg_selection_mask) :return: ''' cls_agnostic_mask = pred_mask_logits.get_shape().as_list()[-1] == 1 total_num_masks, mask_H, mask_W, C = wmlt.combined_static_and_dynamic_shape( pred_mask_logits) assert mask_H == mask_W, "Mask prediction must be square!" gt_masks = inputs[GT_MASKS] #[batch_size,N,H,W] with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 batch_size, X, H, W = wmlt.combined_static_and_dynamic_shape(gt_masks) #background include in proposals, which's indices is -1 gt_masks = wmlt.batch_gather(gt_masks, tf.nn.relu(proposals.indices)) gt_masks = tf.reshape(gt_masks, [-1, H, W]) gt_masks = tf.boolean_mask(gt_masks, fg_selection_mask) boxes = proposals.boxes batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(boxes) boxes = tf.reshape(boxes, [batch_size * box_nr, box_dim]) boxes = tf.boolean_mask(boxes, fg_selection_mask) with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 gt_masks = tf.expand_dims(gt_masks, axis=-1) croped_masks_gt_masks = wmlt.tf_crop_and_resize( gt_masks, boxes, [mask_H, mask_W]) if not cls_agnostic_mask: gt_classes = proposals.gt_object_logits gt_classes = tf.reshape(gt_classes, [-1]) gt_classes = tf.boolean_mask(gt_classes, fg_selection_mask) pred_mask_logits = tf.transpose(pred_mask_logits, [0, 3, 1, 2]) pred_mask_logits = wmlt.batch_gather(pred_mask_logits, gt_classes - 1) #预测中不包含背景 pred_mask_logits = tf.expand_dims(pred_mask_logits, axis=-1) mask_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=croped_masks_gt_masks, logits=pred_mask_logits) mask_loss = tf.reduce_mean(mask_loss) return mask_loss pass
def get_pred_mask(pred_coeff, protonet, foreground_idxs): shape = wmlt.combined_static_and_dynamic_shape(protonet) res = [] for i in range(shape[0]): coeff = tf.boolean_mask(pred_coeff[i], foreground_idxs[i]) pn = tf.reshape(protonet[i], [shape[1] * shape[2], shape[3]]) mask = tf.matmul(coeff, pn, transpose_b=True) mask_shape = wmlt.combined_static_and_dynamic_shape(mask) mask = tf.reshape(mask, [mask_shape[0], shape[1], shape[2]]) res.append(mask) return tf.concat(res, axis=0)
def forward(self, features, batched_inputs): normalizer_fn, normalizer_params = odt.get_norm( self.cfg.NORM, self.is_training) activation_fn = odt.get_activation_fn(self.cfg.ACTIVATION_FN) with tf.variable_scope("FusionBackboneHookV2"): del batched_inputs end_points = list(features.items()) k0, v0 = end_points[0] mfeatures = [] shape0 = wmlt.combined_static_and_dynamic_shape(v0) for k, v in end_points[1:]: net = tf.image.resize_bilinear(v, shape0[1:3]) mfeatures.append(net) net = tf.add_n(mfeatures) / float(len(mfeatures)) ''' 与v2相比,使用sum代替concat ''' net = v0 + net level0 = int(k0[1:]) net = slim.conv2d(net, v0.get_shape().as_list()[-1], [3, 3], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope=f"smooth{level0}") res = features res[f'F{level0}'] = net return res
def smooth_l1_loss(self): """ Compute the smooth L1 loss for box regression. Returns: scalar Tensor """ #gt_anchor_deltas = self.box2box_transform.get_deltas(self.anchors,self.gt_boxes,gt_objectness_logits_i,indices) with tf.name_scope("box_regression_loss"): gt_proposal_deltas = self.box2box_transform.get_deltas_by_proposals_data( self.proposals) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( gt_proposal_deltas) gt_proposal_deltas = tf.reshape(gt_proposal_deltas, [batch_size * box_nr, box_dim]) cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape( ).as_list()[-1] == box_dim num_classes = self.pred_class_logits.get_shape().as_list()[-1] fg_num_classes = num_classes - 1 # Box delta loss is only computed between the prediction for the gt class k # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions # for non-gt classes and background. # Empty fg_inds produces a valid loss of zero as long as the size_average # arg to smooth_l1_loss is False (otherwise it uses mean internally # and would produce a nan loss). fg_inds = tf.greater(self.gt_classes, 0) gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds) pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas, fg_inds) gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds) if not cls_agnostic_bbox_reg: pred_proposal_deltas = tf.reshape( pred_proposal_deltas, [-1, fg_num_classes, box_dim]) pred_proposal_deltas = wmlt.select_2thdata_by_index_v2( pred_proposal_deltas, gt_logits_i - 1) loss_box_reg = tf.losses.huber_loss( predictions=pred_proposal_deltas, labels=gt_proposal_deltas, loss_collection=None, reduction=tf.losses.Reduction.SUM, ) num_samples = wmlt.num_elements(self.gt_classes) # The loss is normalized using the total number of regions (R), not the number # of foreground regions even though the box regression loss is only defined on # foreground regions. Why? Because doing so gives equal training influence to # each foreground example. To see how, consider two different minibatches: # (1) Contains a single foreground region # (2) Contains 100 foreground regions # If we normalize by the number of foreground regions, the single example in # minibatch (1) will be given 100 times as much influence as each foreground # example in minibatch (2). Normalizing by the total number of regions, R, # means that the single example in minibatch (1) and each of the 100 examples # in minibatch (2) are given equal influence. loss_box_reg = loss_box_reg / num_samples wsummary.histogram_or_scalar(loss_box_reg, "fast_rcnn/box_reg_loss") return loss_box_reg * self.cfg.MODEL.ROI_HEADS.BOX_REG_LOSS_SCALE
def fusion(self, idx, features, level, scope): with tf.variable_scope(scope): weights = [] for i in range(len(features)): if i == idx: weights.append(None) else: with tf.name_scope(f"get_w{level+idx-i}"): wi = tf.get_variable(f"w{level+idx-i}", shape=(), dtype=tf.float32, initializer=tf.ones_initializer) wi = tf.nn.relu(wi) + 1e-8 wi = wnnl.scale_gradient(wi, 0.1, is_training=self.is_training) weights.append(wi) nets = [] shape = wmlt.combined_static_and_dynamic_shape(features[idx])[1:3] for wi, pf in zip(weights, features): if wi is not None: pf = self.interpolate_op(pf, shape) nets.append(wi * pf) else: nets.append(pf) weights.remove(None) return tf.add_n(nets) / tf.add_n(weights + [1.0])
def forward(self, features, batched_inputs): features = self.bh(features, batched_inputs) del batched_inputs res = OrderedDict() featuremap_keys = ["P3", "P4", "P5", "P6", "P7"] anchor_sizes = global_cfg.MODEL.ANCHOR_GENERATOR.SIZES anchor_ratios = global_cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS normalizer_fn, normalizer_params = odt.get_norm( "evo_norm_s0", is_training=self.is_training) ref = features[featuremap_keys[1]] ref_shape = wmlt.combined_static_and_dynamic_shape(ref)[1:3] ref_size = anchor_sizes[1][0] nr = 0 with tf.name_scope("MakeAnchorsForRetinaNet"): for i, k in enumerate(featuremap_keys): net = features[k] for j, s in enumerate(anchor_sizes[i]): for k, r in enumerate(anchor_ratios[i][j]): net = slim.separable_conv2d( net, 32, kernel_size=3, padding="SAME", depth_multiplier=1, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope=f"sep_conv_{i}{j}{k}") target_shape = self.get_shape(ref_shape, ref_size, s, r) net = tf.image.resize_nearest_neighbor( net, target_shape) res[f"P{nr}"] = net nr += 1 return res
def fuse_layer(self, xs): with tf.variable_scope("Fuse"): ys = [] for i,v0 in enumerate(xs): chl = get_channel(xs[i]) shape0 = wmlt.combined_static_and_dynamic_shape(v0)[1:3] datas = [] for j, v1 in enumerate(xs): if i != j: if i<j: #upsample v1 = tf.image.resize_nearest_neighbor(v1, shape0,name="upsample") v1 = slim.conv2d(v1, chl, [1, 1], activation_fn=None, normalizer_fn=self.normalizer_fn, normalizer_params=self.normalizer_params, scope=f"smooth{i}_{j}") elif i>j: v1 = self.downsamplev2(v1,chl,i-j) datas.append(v1) if len(datas)>1: v = tf.add_n(datas) / len(datas) if self.activation_fn is not None: v = self.activation_fn(v) else: v = datas[0] ys.append(v) return ys
def apply_deltas(self, regression, img_size=None, fm_size=None): if len(regression.get_shape()) == 2: B = 1 H = fm_size[0] W = fm_size[1] elif len(regression.get_shape()) == 4: B, H, W, _ = wmlt.combined_static_and_dynamic_shape(regression) else: raise NotImplementedError("Error") x_i, y_i = tf.meshgrid(tf.range(W), tf.range(H)) if isinstance(img_size, tf.Tensor) and img_size.dtype != tf.float32: img_size = tf.to_float(img_size) H = tf.to_float(H) W = tf.to_float(W) y_f = tf.to_float(y_i) + 0.5 x_f = tf.to_float(x_i) + 0.5 y_delta = img_size[0] / H x_delta = img_size[1] / W y_base_value = y_f * y_delta x_base_value = x_f * x_delta base_value = tf.stack( [y_base_value, x_base_value, y_base_value, x_base_value], axis=-1) if len(regression.get_shape()) == 4: base_value = tf.expand_dims(base_value, axis=0) base_value = tf.stop_gradient(tf.tile(base_value, [B, 1, 1, 1])) multi = tf.convert_to_tensor([[[[-1, -1, 1, 1]]]], dtype=tf.float32) elif len(regression.get_shape()) == 2: base_value = tf.reshape(base_value, [-1, 4]) multi = tf.convert_to_tensor([[-1, -1, 1, 1]], dtype=tf.float32) return base_value + regression * multi
def pyapply_deltas(self, datas, img_size=None): ''' ''' h_ct = tf.nn.sigmoid(datas['heatmaps_ct']) offset = datas['offset'] hw = datas['hw'] B, H, W, C = wmlt.combined_static_and_dynamic_shape(h_ct) offset = tf.reshape(offset, [B, -1, 2]) hw = tf.reshape(hw, [B, -1, 2]) h_ct = self.pixel_nms(h_ct, threshold=self.score_threshold) ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = self._topk(h_ct, k=self.k) C = btf.channel(h_ct) hw_inds = ct_inds // C K = self.k ct_ys = tf.reshape(ct_ys, [B, K]) ct_xs = tf.reshape(ct_xs, [B, K]) offset = wmlt.batch_gather(offset, hw_inds) offset = tf.reshape(offset, [B, K, 2]) offset_y, offset_x = tf.unstack(offset, axis=-1) ct_xs = ct_xs + offset_x ct_ys = ct_ys + offset_y hw = wmlt.batch_gather(hw, hw_inds) hw = tf.reshape(hw, [B, K, 2]) h, w = tf.unstack(hw, axis=-1) ymin, xmin, ymax, xmax = [ ct_ys - h / 2, ct_xs - w / 2, ct_ys + h / 2, ct_xs + w / 2 ] bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1) bboxes = odb.tfabsolutely_boxes_to_relative_boxes(bboxes, width=W, height=H) return bboxes, ct_clses, ct_scores, hw_inds
def predict_boxes_for_gt_classes(self): ''' 当后继还有mask或keypoint之类分支,它们可以在与RCNN相同的输入(即RPN的输出上处理), 也可以在RCNN的输出上处理, 这个函数用于辅助完成在RCNN的输出结果上进行处理的功能,现在的输入的proposal box已经是[batch_size,N,4], 经过处理后 还是这个形状 Detectron2所有的配置都没有使用这一功能,但理论上来说这样更好(但训练的效率更低) 为了防止前期不能生成好的结果,这里实现相对于Detectron2来说加入了gt_boxes :return: [batch_size,box_nr,box_dim] ''' with tf.name_scope("predict_boxes_for_gt_classes"): predicted_boxes = self.predict_boxes() B = self.proposals[PD_BOXES].get_shape().as_list()[-1] # If the box head is class-agnostic, then the method is equivalent to `predicted_boxes`. if predicted_boxes.get_shape().as_list()[-1] > B: gt_classes = tf.reshape(self.proposals.gt_object_logits, [-1]) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( self.proposals[PD_BOXES]) predicted_boxes = tf.reshape( predicted_boxes, [batch_size * box_nr, -1, box_dim]) predicted_boxes = wmlt.batch_gather(predicted_boxes, gt_classes) predicted_boxes = tf.reshape(predicted_boxes, [batch_size, box_nr, box_dim]) return predicted_boxes
def _make_batch_index_for_pooler(bboxes): with tf.name_scope("make_batch_index_for_pooler"): batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( bboxes) batch_index = tf.expand_dims(tf.range(batch_size), axis=1) * tf.ones( [1, box_nr], dtype=tf.int32) return batch_index
def reshape_to_N_HWA_K(tensor, K): """ Transpose/reshape a tensor from (N, (A x K), H, W) to (N, (HxWxA), K) """ assert len(tensor.get_shape()) == 4, tensor.shape N, H, W, _ = wmlt.combined_static_and_dynamic_shape(tensor) tensor = tf.reshape(tensor,[N,-1,K]) return tensor
def trans(net): if len(net.get_shape()) > 2: shape = wmlt.combined_static_and_dynamic_shape(net) dim = 1 for x in shape[1:]: dim *= x return tf.reshape(net,[shape[0],dim]) else: return net
def _topk(scores, K=100): B, H, W, C = wmlt.combined_static_and_dynamic_shape(scores) scores = tf.reshape(scores, [B, -1]) topk_scores, topk_inds = tf.nn.top_k(scores, k=K) topk_classes = topk_inds % C topk_inds = topk_inds // C topk_ys = tf.cast(topk_inds // W, tf.float32) topk_xs = tf.cast(topk_inds % W, tf.float32) return topk_scores, topk_inds, topk_classes, topk_ys, topk_xs
def forward(self, features, batched_inputs): normalizer_fn, normalizer_params = odt.get_norm( "evo_norm_s0", is_training=self.is_training) res = OrderedDict() with tf.variable_scope("BalanceNonLocalBackboneHook"): del batched_inputs ref_index = 1 end_points = list(features.items()) k0, v0 = end_points[ref_index] mfeatures = [] with tf.name_scope("fusion"): shape0 = wmlt.combined_static_and_dynamic_shape(v0) for i, (k, v) in enumerate(end_points): if i == ref_index: net = v else: net = tf.image.resize_bilinear(v, shape0[1:3], name=f"resize{i}") mfeatures.append(net) net = tf.add_n(mfeatures) / float(len(mfeatures)) net = slim.conv2d(net, net.get_shape().as_list()[-1], [3, 3], activation_fn=None, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope=f"smooth") for i, (k, v) in enumerate(end_points): with tf.variable_scope(f"merge{i}"): shape = wmlt.combined_static_and_dynamic_shape(v) v0 = tf.image.resize_bilinear(net, shape[1:3]) net = v + v0 if i > 0: net = wnnl.non_local_blockv1( net, inner_dims_multiplier=[1, 1, 1], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, activation_fn=None, weighed_sum=False) res[k] = net return res
def split_states(x, n): ''' :param x: [batch_size,M,D] :param n: 分割数 :return: [batch_size,M,n,D//n] ''' x_shape = wmlt.combined_static_and_dynamic_shape(x) m = x_shape[-1] new_x_shape = x_shape[:-1]+[n, m//n] return wmlt.reshape(x, new_x_shape)
def forward(self, features, batched_inputs): low_features = self.parent.low_features normalizer_fn, normalizer_params = odt.get_norm( "evo_norm_s0", is_training=self.is_training) res = OrderedDict() with tf.variable_scope("BalanceBackboneHookV2"): del batched_inputs ref_index = 1 end_points = list(features.items()) k0, v0 = end_points[ref_index] mfeatures = [] with tf.name_scope("fusion"): shape0 = wmlt.combined_static_and_dynamic_shape(v0) for i, (k, v) in enumerate(end_points): if i == ref_index: net = v else: net = tf.image.resize_bilinear(v, shape0[1:3], name=f"resize{i}") mfeatures.append(net) net = tf.add_n(mfeatures) / float(len(mfeatures)) net = slim.conv2d(net, net.get_shape().as_list()[-1], [3, 3], activation_fn=None, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope=f"smooth") for i, (k, v) in enumerate(end_points): with tf.name_scope(f"smooth_low_feature{i}"): index = int(k[1:]) low_feature = low_features[f"C{index}"] channel = v.get_shape().as_list()[-1] low_feature = slim.conv2d(low_feature, channel, [1, 1], activation_fn=None, normalizer_fn=None) with tf.name_scope(f"merge{i}"): shape = wmlt.combined_static_and_dynamic_shape(v) v0 = tf.image.resize_bilinear(net, shape[1:3]) res[k] = tf.concat([v + v0, low_feature], axis=-1) return res
def __init__(self, adj_mt, points_data, edges_data, edges_data_dim=None, axis=1): print(f"{type(self).__name__}") if not isinstance(adj_mt, tf.Tensor) and adj_mt is not None: adj_mt = tf.convert_to_tensor(adj_mt) if not isinstance(points_data, tf.Tensor) and points_data is not None: points_data = tf.convert_to_tensor(points_data) if not isinstance(edges_data, tf.Tensor) and edges_data is not None: edges_data = tf.convert_to_tensor(edges_data) if edges_data is None: assert edges_data_dim is not None, "edges_data or edges_data_dim must not be None." else: edges_data_dim = edges_data.get_shape().as_list()[-1] self.adj_mt = adj_mt if self.adj_mt.dtype is not tf.bool: self.adj_mt = tf.cast(self.adj_mt, tf.bool) self.line_mask = tf.cast(tf.reshape(self.adj_mt, [-1]), tf.bool) #先行后列 self.edges_data = edges_data self.points_data = points_data p_num = wmlt.combined_static_and_dynamic_shape(points_data)[0] self.axis = axis self._points_size = p_num with tf.device(":/cpu:0"): self.point_indexs = tf.range(self._points_size) #[edge_nr,2]:tf.Tensor,(sender_index,receive_index] value in [0,points_nr) self.senders_indexs, self.receivers_indexs = self.make_edge_to_points_indexs( ) self.real_edge_nr = tf.shape(self.senders_indexs)[0] #self._points_size = tf.Print(self._points_size,["p_e_size",self._points_size,self.real_edge_nr]) self.global_attr = None self.p2e_offset_index = self.get_offset_index_for_p2e() #[points_nr,2] list,value is tf.Tensor, Tensor's shape is [], tensor's value is [0,edge_nr) self.points_to_sedges, self.points_to_redges = self.make_points_to_edges_indexs( ) self.edges_reducer_for_points = tf.unsorted_segment_sum #输入为[X,edge_hiden_size],输出为[1,edge_hiden_size] self.edges_reducer_for_global = functools.partial(tf.reduce_mean, axis=0, keepdims=True) #输入为[X,node_hiden_size],输出为[1,node_hiden_size] self.points_reducer_for_global = functools.partial(tf.reduce_mean, axis=0, keepdims=True) self.use_sent_edges_for_node = True self.use_received_edges_for_node = True
def add_full_size_mask(self): if RD_MASKS not in self.res_data: return if RD_FULL_SIZE_MASKS in self.res_data: return boxes = self.res_data[RD_BOXES] instance_masks = self.res_data[RD_MASKS] shape = wmlt.combined_static_and_dynamic_shape(self.data) self.res_data[RD_FULL_SIZE_MASKS] = imv.batch_tf_get_fullsize_mask( boxes=boxes, masks=instance_masks, size=shape[1:3])
def forward(self, net, batched_inputs, reuse=None): with tf.variable_scope("AddBBoxesSizeInfo"): reg_net = net shape = wmlt.combined_static_and_dynamic_shape(net) C = shape[-1] K = 4 with tf.variable_scope("pos_embedding"): pos_embs_shape = [1, shape[1], shape[2], K * C] pos_embedding = tf.get_variable( "pos_embs", shape=pos_embs_shape, dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.02)) bboxes = self.parent.t_proposal_boxes with tf.name_scope("trans_bboxes"): _, H, W, _ = btf.combined_static_and_dynamic_shape( batched_inputs[IMAGE]) bboxes = odb.tfrelative_boxes_to_absolutely_boxes(bboxes, W, H) bymin, bxmin, bymax, bxmax = tf.unstack(bboxes, axis=-1) bh = bymax - bymin bw = bxmax - bxmin br0 = bh / (bw + 1e-8) br1 = bw / (bh + 1e-8) bboxes = tf.stack([bh, bw, br0, br1], axis=-1) B, BN, BC = btf.combined_static_and_dynamic_shape(bboxes) bboxes = tf.reshape(bboxes, [B * BN, BC]) bboxes = tf.stop_gradient(bboxes) bboxes = slim.fully_connected(bboxes, C, activation_fn=self.activation_fn, normalizer_fn=self.normalizer_fn, normalizer_params=self.norm_params) bboxes = slim.fully_connected(bboxes, K * C, activation_fn=tf.nn.sigmoid, normalizer_fn=None) pos_embedding = tf.reshape(bboxes, [B * BN, 1, 1, K * C]) * pos_embedding pos_embedding = tf.layers.dense( pos_embedding, C, kernel_initializer=tf.truncated_normal_initializer( stddev=0.02)) cls_net = wnnl.non_local_blockv4(net, scope=f"non_local", normalizer_fn=wnnl.evo_norm_s0, activation_fn=None, n_head=4, weighed_sum=False, pos_embedding=pos_embedding) return cls_net, reg_net
def get_pred_iou_lossv1(self): ''' 使用预测的bboxes与gtbboxes的iou作为目标 :return: ''' with tf.name_scope("get_pred_iouv1_loss"): gt_proposal_deltas = wmlt.batch_gather( self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices)) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( gt_proposal_deltas) gt_proposal_deltas = tf.reshape(gt_proposal_deltas, [batch_size * box_nr, box_dim]) proposal_bboxes = tf.reshape(self.proposals.boxes, [batch_size * box_nr, box_dim]) cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape( ).as_list()[-1] == box_dim num_classes = self.pred_class_logits.get_shape().as_list()[-1] fg_num_classes = num_classes - 1 pred_iou_logits = self.pred_iou_logits fg_inds = tf.greater(self.gt_classes, 0) gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds) pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas, fg_inds) proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds) gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds) pred_iou_logits_pos = tf.reshape( tf.boolean_mask(pred_iou_logits, fg_inds), [-1]) pred_iou_logits_neg = tf.reshape( tf.boolean_mask(pred_iou_logits, tf.logical_not(fg_inds)), [-1]) if not cls_agnostic_bbox_reg: pred_proposal_deltas = tf.reshape( pred_proposal_deltas, [-1, fg_num_classes, box_dim]) pred_proposal_deltas = wmlt.select_2thdata_by_index_v2( pred_proposal_deltas, gt_logits_i - 1) pred_bboxes = self.box2box_transform.apply_deltas( pred_proposal_deltas, boxes=proposal_bboxes) loss_box_reg = odl.giou(pred_bboxes, gt_proposal_deltas) loss_box_reg = tf.stop_gradient(loss_box_reg) loss_pos = wnn.sigmoid_cross_entropy_with_logits_FL( labels=loss_box_reg, logits=pred_iou_logits_pos) loss_pos = tf.reduce_mean(loss_pos) loss_neg = wnn.sigmoid_cross_entropy_with_logits_FL( labels=tf.zeros_like(pred_iou_logits_neg), logits=pred_iou_logits_neg) loss_neg = tf.reduce_mean(loss_neg) * 0.5 tf.summary.scalar("iou_pos_loss", loss_pos) tf.summary.scalar("iou_neg_loss", loss_neg) loss = loss_pos + loss_neg return loss
def inference(self, inputs, box_cls, box_regression, center_ness, nms=None, pad=True): """ Arguments: inputs: same as FCOS.forward's batched_inputs box_cls: list of Tensor, Tensor's shape is [B,H,W,A*num_classes] box_delta: list of Tensor, Tensor's shape is [B,H,W,A*4] Returns: results: RD_BOXES: [B,N,4] RD_LABELS: [B,N] RD_PROBABILITY:[ B,N] RD_LENGTH:[B] """ assert len(box_cls[0].get_shape()) == 4, "error box cls dims" assert len(box_regression[0].get_shape()) == 4, "error box delta dims" B, _, _, _ = wmlt.combined_static_and_dynamic_shape(box_regression[0]) fm_sizes = [tf.shape(x)[1:3] for x in box_regression] box_cls = [reshape_to_N_HWA_K(x, self.num_classes) for x in box_cls] box_regression = [reshape_to_N_HWA_K(x, 4) for x in box_regression] center_ness = [tf.reshape(x, [B, -1]) for x in center_ness] box_cls = tf.concat(box_cls, axis=1) box_regression = tf.concat(box_regression, axis=1) center_ness = tf.concat(center_ness, axis=1) results = wmlt.static_or_dynamic_map_fn( lambda x: self.inference_single_image( x[0], x[1], x[2], fm_sizes, nms=nms, pad=pad), elems=[box_cls, box_regression, center_ness], dtype=[tf.float32, tf.int32, tf.float32, tf.int32], back_prop=False) outdata = { RD_BOXES: results[0], RD_LABELS: results[1], RD_PROBABILITY: results[2], RD_LENGTH: results[3] } if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: wsummary.detection_image_summary( images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], lengths=outdata[RD_LENGTH], scores=outdata[RD_PROBABILITY], name="FCOSGIou_result", category_index=DataLoader.category_index) return outdata
def forward(self, inputs, features): anchors = [] image = inputs['image'] assert len(features) == len( self.sizes ), f"Error features len {len(features)} vs {len(self.sizes)}." with tf.name_scope("anchor_generator"): size = wmlt.combined_static_and_dynamic_shape(image)[1:3] for i, feature in enumerate(features): shape = wmlt.combined_static_and_dynamic_shape(feature) ''' anchor_generator反回的为[N,4] 表示每个位置,每个比率,每个尺寸的anchor box 即[box0(s0,r0),box1(s1,r0),box2(s0,r1),box3(s1,r1),...] ''' if not isinstance(self.aspect_ratios[i][0], Iterable): anchors.append( anchor_generator(shape=shape[1:3], size=size, scales=self.sizes[i], aspect_ratios=self.aspect_ratios[i])) else: assert len(self.sizes[i]) == len( self.aspect_ratios[i]), "error size" tanchors = [] for j in range(len(self.sizes[i])): tanchors.append( anchor_generator( shape=shape[1:3], size=size, scales=[self.sizes[i][j]], aspect_ratios=self.aspect_ratios[i][j])) tanchors = [tf.expand_dims(x, axis=1) for x in tanchors] tanchors = tf.concat(tanchors, axis=1) tanchors = tf.reshape(tanchors, [-1, 4]) anchors.append(tanchors) if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: self.show_anchors(anchors, features, img_size=size) return anchors
def mask_rcnn_inference(pred_mask_logits, pred_instances): """ Convert pred_mask_logits to estimated foreground probability masks while also extracting only the masks for the predicted classes in pred_instances. For each predicted box, the mask of the same class is attached to the instance by adding a new RD_MASKS field to pred_instances. Args: pred_mask_logits (Tensor): A tensor of shape (B,Hmask, Wmask,C) or (B, Hmask, Wmask, 1) for class-specific or class-agnostic, where B is the total number of predicted masks in all images, C is the number of foreground classes, and Hmask, Wmask are the height and width of the mask predictions. The values are logits. pred_instances (dict): A dict of prediction results, pred_instances[RD_LABELS]:[batch_size,Y], pred_instances[RD_LENGTH], [batch_size] current the batch_size must be 1, and X == pred_instances[RD_LENGTH][0] == Y Returns: None. pred_instances will contain an extra RD_MASKS field storing a mask of size [batch_size,Y,Hmask, Wmask] for predicted class. Note that the masks are returned as a soft (non-quantized) masks the resolution predicted by the network; post-processing steps, such as resizing the predicted masks to the original image resolution and/or binarizing them, is left to the caller. """ cls_agnostic_mask = pred_mask_logits.get_shape().as_list()[-1] == 1 labels = pred_instances[RD_LABELS] batch_size, box_nr = wmlt.combined_static_and_dynamic_shape(labels) if not cls_agnostic_mask: # Select masks corresponding to the predicted classes pred_mask_logits = tf.transpose(pred_mask_logits, [0, 3, 1, 2]) labels = tf.reshape(labels, [-1]) - 1 #去掉背景 #当同时预测多个图片时,labels后面可能有填充的0,上一步减1时可能出现负数 pred_mask_logits = wmlt.batch_gather(pred_mask_logits, tf.nn.relu(labels)) total_box_nr, H, W = wmlt.combined_static_and_dynamic_shape( pred_mask_logits) pred_mask_logits = tf.reshape(pred_mask_logits, [batch_size, box_nr, H, W]) pred_mask_logits = tf.nn.sigmoid(pred_mask_logits) pred_instances[RD_MASKS] = pred_mask_logits
def _get_ground_truth(self): """ Returns: """ res = [] for i,outputs in enumerate(self.head_outputs): shape = wmlt.combined_static_and_dynamic_shape(outputs['heatmaps_ct'])[1:3] t_res = self.box2box_transform.get_deltas(self.gt_boxes, self.gt_labels, self.gt_length, output_size=shape) res.append(t_res) return res
def get_pred_centerness_loss(self): with tf.name_scope("get_pred_centerness_loss"): gt_proposal_deltas = wmlt.batch_gather( self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices)) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( gt_proposal_deltas) gt_proposal_deltas = tf.reshape(gt_proposal_deltas, [batch_size * box_nr, box_dim]) proposal_bboxes = tf.reshape(self.proposals.boxes, [batch_size * box_nr, box_dim]) cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape( ).as_list()[-1] == box_dim num_classes = self.pred_class_logits.get_shape().as_list()[-1] fg_num_classes = num_classes - 1 pred_iou_logits = self.pred_iou_logits fg_inds = tf.greater(self.gt_classes, 0) gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds) pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas, fg_inds) proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds) gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds) pred_iou_logits_pos = tf.reshape( tf.boolean_mask(pred_iou_logits, fg_inds), [-1]) if not cls_agnostic_bbox_reg: pred_proposal_deltas = tf.reshape( pred_proposal_deltas, [-1, fg_num_classes, box_dim]) pred_proposal_deltas = wmlt.select_2thdata_by_index_v2( pred_proposal_deltas, gt_logits_i - 1) pred_bboxes = self.box2box_transform.apply_deltas( pred_proposal_deltas, boxes=proposal_bboxes) pred_bboxes = odb.to_cxyhw(proposal_bboxes) gt_bboxes = odb.to_cxyhw(gt_proposal_deltas) deltas = tf.abs(gt_bboxes[..., :2] - pred_bboxes[..., :2]) * 2 wsummary.histogram_or_scalar(deltas, "centerness_deltas") centerness = 1 - tf.reduce_max( deltas / (gt_bboxes[..., 2:] + 1e-8), axis=-1, keepdims=False) wsummary.histogram_or_scalar(centerness, "centerness") loss_pos = wnn.sigmoid_cross_entropy_with_logits_FL( labels=centerness, logits=pred_iou_logits_pos) wsummary.histogram_or_scalar(tf.nn.sigmoid(pred_iou_logits_pos), "pred_centerness") loss_pos = tf.reduce_mean(loss_pos) tf.summary.scalar("centerness_loss", loss_pos) loss = loss_pos return loss
def _make_transition_layer(self,xs, num_channels_pre_layer, num_channels_cur_layer,scope=None): num_branches_cur = len(num_channels_cur_layer) num_branches_pre = len(num_channels_pre_layer) ys = [] with tf.variable_scope(scope,default_name="transition_layer"): out_shapes = [] for i in range(num_branches_cur): if i < num_branches_pre: out_shapes.append(wmlt.combined_static_and_dynamic_shape(xs[i])[1:3]) else: last_shape = out_shapes[-1] h = last_shape[0]//2 w = last_shape[1]//2 out_shapes.append([h,w]) with tf.variable_scope("Fuse"): for i in range(num_branches_cur): chl = num_channels_cur_layer[i] shape0 = out_shapes[i] datas = [] for j, v1 in enumerate(xs): if i != j: if i<j: #upsample v1 = tf.image.resize_nearest_neighbor(v1, shape0,name="upsample") v1 = slim.conv2d(v1, chl, [1, 1], activation_fn=None, normalizer_fn=self.normalizer_fn, normalizer_params=self.normalizer_params, scope=f"smooth{i}_{j}") elif i>j: v1 = self.downsamplev2(v1,chl,i-j) elif chl != get_channel(v1): v1 = slim.conv2d(v1, chl, [3, 3], activation_fn=None, normalizer_fn=self.normalizer_fn, normalizer_params=self.normalizer_params, scope=f"project_{i}_{j}") datas.append(v1) if len(datas)>1: v = tf.add_n(datas) / len(datas) if self.activation_fn is not None: v = self.activation_fn(v) else: v = datas[0] ys.append(v) return ys