def decode_boxes(boxes, regs, prio_scaling=[0.1, 0.1, 0.2, 0.2]): assert btf.channel(boxes) == btf.channel(regs), "box channel must be 4." l_shape = btf.combined_static_and_dynamic_shape(boxes) r_shape = btf.combined_static_and_dynamic_shape(regs) ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) cy = (ymin + ymax) / 2. cx = (xmin + xmax) / 2. h = ymax - ymin w = xmax - xmin if regs.get_shape().ndims == 1: cy = regs[0] * h * prio_scaling[0] + cy cx = regs[1] * w * prio_scaling[1] + cx h = h * tf.exp(regs[2] * prio_scaling[2]) w = w * tf.exp(regs[3] * prio_scaling[3]) else: regs = tf.reshape(regs, (-1, r_shape[-1])) regs0, regs1, regs2, regs3 = tf.unstack(regs, axis=1) cy = regs0 * h * prio_scaling[0] + cy cx = regs1 * w * prio_scaling[1] + cx h = h * tf.exp(regs2 * prio_scaling[2]) w = w * tf.exp(regs3 * prio_scaling[3]) ymin = cy - h / 2. xmin = cx - w / 2. ymax = cy + h / 2. xmax = cx + w / 2. bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=0) bboxes = tf.transpose(bboxes, perm=[1, 0]) bboxes = tf.reshape(bboxes, l_shape) bboxes = tf.clip_by_value(bboxes, 0.0, 1.0) return bboxes
def pyapply_deltas(self, datas, img_size=None): ''' ''' h_ct = tf.nn.sigmoid(datas['heatmaps_ct']) offset = datas['offset'] hw = datas['hw'] B, H, W, C = wmlt.combined_static_and_dynamic_shape(h_ct) offset = tf.reshape(offset, [B, -1, 2]) hw = tf.reshape(hw, [B, -1, 2]) h_ct = self.pixel_nms(h_ct, threshold=self.score_threshold) ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = self._topk(h_ct, k=self.k) C = btf.channel(h_ct) hw_inds = ct_inds // C K = self.k ct_ys = tf.reshape(ct_ys, [B, K]) ct_xs = tf.reshape(ct_xs, [B, K]) offset = wmlt.batch_gather(offset, hw_inds) offset = tf.reshape(offset, [B, K, 2]) offset_y, offset_x = tf.unstack(offset, axis=-1) ct_xs = ct_xs + offset_x ct_ys = ct_ys + offset_y hw = wmlt.batch_gather(hw, hw_inds) hw = tf.reshape(hw, [B, K, 2]) h, w = tf.unstack(hw, axis=-1) ymin, xmin, ymax, xmax = [ ct_ys - h / 2, ct_xs - w / 2, ct_ys + h / 2, ct_xs + w / 2 ] bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1) bboxes = odb.tfabsolutely_boxes_to_relative_boxes(bboxes, width=W, height=H) return bboxes, ct_clses, ct_scores, hw_inds
def losses(self): with tf.name_scope("deeplab_loss"): if self.upsample_logits: logits = tf.image.resize_bilinear(self.logits, btf.img_size(self.labels), align_corners=True) labels = self.labels else: logits = self.logits labels = tf.image.resize_nearest_neighbor(self.labels, btf.img_size( self.logits), align_corners=True) C = btf.channel(labels) labels = tf.reshape(labels, shape=[-1, C]) labels = tf.cast(labels, tf.float32) labels = labels[..., 1:] logits = tf.reshape(logits, [-1, self.num_classes]) loss = wnn.sigmoid_cross_entropy_with_logits_FL( labels=labels, logits=logits, gamma=self.cfg.FOCAL_LOSS_GAMMA, alpha=self.cfg.FOCAL_LOSS_ALPHA) loss = tf.reduce_sum(loss) return {"semantic_loss": loss}
def forward(self, net, batched_inputs, reuse=None): with tf.variable_scope("AddBBoxesSizeInfoV2", reuse=reuse): C = btf.channel(net) bboxes = self.parent.t_proposal_boxes with tf.name_scope("trans_bboxes"): _, H, W, _ = btf.combined_static_and_dynamic_shape( batched_inputs[IMAGE]) bboxes = odb.tfrelative_boxes_to_absolutely_boxes(bboxes, W, H) bymin, bxmin, bymax, bxmax = tf.unstack(bboxes, axis=-1) bh = bymax - bymin bw = bxmax - bxmin br0 = bh / (bw + 1e-8) br1 = bw / (bh + 1e-8) bboxes = tf.stack([bh, bw, br0, br1], axis=-1) B, BN, BC = btf.combined_static_and_dynamic_shape(bboxes) bboxes = tf.reshape(bboxes, [B * BN, BC]) bboxes = tf.stop_gradient(bboxes) bboxes = slim.fully_connected(bboxes, C * 2, activation_fn=self.activation_fn, normalizer_fn=self.normalizer_fn, normalizer_params=self.norm_params) bboxes = slim.fully_connected(bboxes, C * 2, activation_fn=None, normalizer_fn=None) gamma = bboxes[..., :C] beta = bboxes[..., C:] net = wnnl.group_norm_v2(net, gamma, beta) return net
def _dla_upv1(self,features,startp,endp,scope="ida"): conv_op = self.conv_op upsample_op = self.upsample_op with tf.variable_scope(scope): for i in range(startp,endp): C = btf.channel(features[i-1]) x = conv_op(features[i],C,[3,3],scope=f"project_{i}") x = upsample_op(x,C,scope=f"upsample_{i}") features[i] = conv_op(x+features[i-1],C,[3,3],scope=f"node_{i}")
def _dla_upv2(self,features,startp,endp,scope="ida"): conv_op = self.conv_op with tf.variable_scope(scope): for i in range(startp,endp): upsample_op = functools.partial(slim.conv2d_transpose, kernel_size=4**i, stride=2**i, normalizer_fn=self.normalizer_fn, activation_fn=self.activation_fn, normalizer_params=self.norm_params) C = btf.channel(features[i-1]) x = conv_op(features[i],C,[3,3],scope=f"project_{i}") x = upsample_op(x,C,scope=f"upsample_{i}") features[i] = conv_op(x+features[i-1],C,[3,3],scope=f"node_{i}")
def pool(self, x, pool1_fn, pool2_fn, dim=128, scope=None): out_dim = channel(x) with tf.variable_scope(scope, default_name="pool"): with tf.variable_scope("pool1"): look_conv1 = slim.conv2d(x, dim, 3, rate=2) look_right = pool2_fn(look_conv1) p1_conv1 = slim.conv2d(x, dim, 3, rate=2) p1_look_conv = slim.conv2d(p1_conv1 + look_right, dim, 3, biases_initializer=None) pool1 = pool1_fn(p1_look_conv) with tf.variable_scope("pool2"): look_conv2 = slim.conv2d(x, dim, 3, rate=2) look_down = pool1_fn(look_conv2) p2_conv1 = slim.conv2d(x, dim, 3, rate=2) p2_look_conv = slim.conv2d(p2_conv1 + look_down, dim, 3, biases_initializer=None) pool2 = pool2_fn(p2_look_conv) with tf.variable_scope("merge"): p_conv1 = slim.conv2d(pool1 + pool2, out_dim, 3, biases_initializer=None, normalizer_fn=self.normalizer_fn, normalizer_params=self.norm_params, activation_fn=None) conv1 = slim.conv2d(x, out_dim, 1, biases_initializer=None, normalizer_fn=self.normalizer_fn, normalizer_params=self.norm_params, activation_fn=None) if self.activation_fn is not None: relu1 = self.activation_fn(p_conv1 + conv1) else: relu1 = p_conv1 + conv1 conv2 = slim.conv2d(relu1, out_dim, 3, normalizer_fn=None, activation_fn=None) return conv2
def __gru(self, h, x, scope=None): with tf.variable_scope(scope, default_name="GRU"): dim = btf.channel(h) input0 = tf.concat([h, x], axis=-1) net = slim.fully_connected(input0, dim * 2, activation_fn=None, normalizer_fn=None) net = tf.nn.sigmoid(net) r, z = tf.split(net, num_or_size_splits=2, axis=-1) input1 = tf.concat([r * h, x], axis=-1) h_hat = slim.fully_connected(input1, dim, activation_fn=tf.nn.tanh, normalizer_fn=None) h_t = (1 - z) * h + z * h_hat #y = slim.fully_connected(h_t,dim,activation_fn=tf.nn.sigmoid,normalizer_fn=None) return h_t
def losses(self): with tf.name_scope("deeplab_loss"): if self.upsample_logits: logits = tf.image.resize_bilinear(self.logits, btf.img_size(self.labels), align_corners=True) labels = self.labels else: logits = self.logits labels = tf.image.resize_nearest_neighbor(self.labels, btf.img_size(self.logits), align_corners=True) C = btf.channel(labels) labels = tf.reshape(labels,shape=[-1,C]) labels = tf.cast(labels,tf.float32) logits = tf.reshape(logits,[-1,self.num_classes]) loss = tf.losses.softmax_cross_entropy(labels,logits,loss_collection=None) return {"semantic_loss":loss}
def pool_cross(self, x, pool1_fn, pool2_fn, pool3_fn, pool4_fn, dim=128, scope=None): out_dim = channel(x) with tf.variable_scope(scope, default_name="pool_cross"): x = slim.conv2d(x, dim, 3) x = slim.conv2d(x, dim, 3) x = slim.conv2d(x, out_dim, 3, normalizer_fn=None, activation_fn=None) return x '''with tf.variable_scope("pool1"):
def inference(self, inputs, pred_maps): """ Arguments: inputs: same as forward's batched_inputs pred_maps: outputs of openpose head Returns: results: RD_BOXES: [B,N,4] RD_PROBABILITY:[ B,N] RD_KEYPOINTS:[B,N,NUM_KEYPOINTS,2] RD_LENGTH:[B] """ _, _, pred_finaly_maps = pred_maps C = btf.channel(pred_finaly_maps) conf_maps, paf_maps = tf.split( pred_finaly_maps, [self.cfg.NUM_KEYPOINTS, C - self.cfg.NUM_KEYPOINTS], axis=-1) output_keypoints, output_lens = tfop.open_pose_decode( conf_maps, paf_maps, self.cfg.POINTS_PAIRS, keypoints_th=self.cfg.OPENPOSE_KEYPOINTS_TH, interp_samples=self.cfg.OPENPOSE_INTERP_SAMPLES, paf_score_th=self.cfg.OPENPOSE_PAF_SCORE_TH, conf_th=self.cfg.DET_SCORE_THRESHOLD_TEST, max_detection=self.max_detections_per_image) bboxes = kp.batch_get_bboxes(output_keypoints, output_lens) outdata = { RD_BOXES: bboxes, RD_LENGTH: output_lens, RD_KEYPOINT: output_keypoints } if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: wsummary.keypoints_image_summary( images=inputs[IMAGE], keypoints=output_keypoints, lengths=outdata[RD_LENGTH], keypoints_pair=self.cfg.POINTS_PAIRS, name="KeyPoints_result") return outdata
def forward(self, x,scope="BoxPredictor"): with tf.variable_scope(scope): if not isinstance(x,tf.Tensor) and isinstance(x,Iterable): if self.cfg.MODEL.ROI_HEADS.PRED_IOU: assert len(x)==3, "error x length." else: assert len(x) == 2, "error x length." if len(x[0].get_shape()) == 2: scores = slim.fully_connected(x[0],self.num_classes+1,activation_fn=None, normalizer_fn=None,scope="cls_score") else: scores = slim.conv2d(x[0], self.num_classes + 1, [1,1], activation_fn=None, normalizer_fn=None, scope="cls_score") scores = tf.reduce_mean(scores,axis=[1,2],keepdims=False, name="cls_score") foreground_num_classes = self.num_classes num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes if len(x[1].get_shape()) == 2: proposal_deltas = slim.fully_connected(x[1],self.box_dim*num_bbox_reg_classes,activation_fn=None, normalizer_fn=None,scope="bbox_pred") else: proposal_deltas = slim.conv2d(x[1], self.box_dim*num_bbox_reg_classes, [1,1], activation_fn=None, normalizer_fn=None, scope="bbox_pred") proposal_deltas = tf.reduce_mean(proposal_deltas,axis=[1,2],keepdims=False, name="bbox_pred") if self.cfg.MODEL.ROI_HEADS.PRED_IOU: if len(x[2].get_shape()) == 2: if btf.channel(x[2]) != 1: iou_logits = slim.fully_connected(x[2], 1, activation_fn=None, normalizer_fn=None, scope="iou_pred") else: iou_logits = x[2] else: iou_logits = slim.conv2d(x[2], 1, [1,1], activation_fn=None, normalizer_fn=None, scope="iou_pred") iou_logits = tf.reduce_mean(iou_logits,axis=[1,2], keepdims=False, name="iou_pred") else: if len(x.get_shape()) > 2: shape = wmlt.combined_static_and_dynamic_shape(x) x = tf.reshape(x,[shape[0],-1]) scores = slim.fully_connected(x,self.num_classes+1,activation_fn=None, normalizer_fn=None,scope="cls_score") foreground_num_classes = self.num_classes num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes proposal_deltas = slim.fully_connected(x,self.box_dim*num_bbox_reg_classes,activation_fn=None, normalizer_fn=None,scope="bbox_pred") if self.cfg.MODEL.ROI_HEADS.PRED_IOU: iou_logits = slim.fully_connected(x,1, activation_fn=None, normalizer_fn=None, scope="iou_pred") wsummary.variable_summaries_v2(proposal_deltas,"proposal_deltas") if self.cfg.MODEL.ROI_HEADS.PRED_IOU: return scores, proposal_deltas,iou_logits else: return scores, proposal_deltas