def forward(self, net, batched_inputs, reuse=None): with tf.variable_scope("AddBBoxesSizeInfoV2", reuse=reuse): C = btf.channel(net) bboxes = self.parent.t_proposal_boxes with tf.name_scope("trans_bboxes"): _, H, W, _ = btf.combined_static_and_dynamic_shape( batched_inputs[IMAGE]) bboxes = odb.tfrelative_boxes_to_absolutely_boxes(bboxes, W, H) bymin, bxmin, bymax, bxmax = tf.unstack(bboxes, axis=-1) bh = bymax - bymin bw = bxmax - bxmin br0 = bh / (bw + 1e-8) br1 = bw / (bh + 1e-8) bboxes = tf.stack([bh, bw, br0, br1], axis=-1) B, BN, BC = btf.combined_static_and_dynamic_shape(bboxes) bboxes = tf.reshape(bboxes, [B * BN, BC]) bboxes = tf.stop_gradient(bboxes) bboxes = slim.fully_connected(bboxes, C * 2, activation_fn=self.activation_fn, normalizer_fn=self.normalizer_fn, normalizer_params=self.norm_params) bboxes = slim.fully_connected(bboxes, C * 2, activation_fn=None, normalizer_fn=None) gamma = bboxes[..., :C] beta = bboxes[..., C:] net = wnnl.group_norm_v2(net, gamma, beta) return net
def _get_ground_truth(self, net): map_shape = btf.combined_static_and_dynamic_shape(net) output = tfop.hr_net_encode( keypoints=self.gt_keypoints, output_size=map_shape[1:3], glength=self.gt_length, gaussian_delta=self.cfg.OPENPOSE_GAUSSIAN_DELTA) gt_conf_maps = output[0] gt_indexs = output[1] wsummary.feature_map_summary(gt_conf_maps, "gt_conf_maps", max_outputs=5) if self.cfg.USE_LOSS_MASK: B, H, W, _ = btf.combined_static_and_dynamic_shape(gt_conf_maps) image = tf.zeros([B, H, W, 1]) mask = odtl.batch_fill_bboxes(image, self.gt_boxes, v=1.0, length=self.gt_length, H=H, W=W, relative_coord=True) conf_mask = mask tf.summary.image("loss_mask", mask, max_outputs=5) else: conf_mask = None return gt_conf_maps, gt_indexs, conf_mask
def decode_boxes(boxes, regs, prio_scaling=[0.1, 0.1, 0.2, 0.2]): assert btf.channel(boxes) == btf.channel(regs), "box channel must be 4." l_shape = btf.combined_static_and_dynamic_shape(boxes) r_shape = btf.combined_static_and_dynamic_shape(regs) ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) cy = (ymin + ymax) / 2. cx = (xmin + xmax) / 2. h = ymax - ymin w = xmax - xmin if regs.get_shape().ndims == 1: cy = regs[0] * h * prio_scaling[0] + cy cx = regs[1] * w * prio_scaling[1] + cx h = h * tf.exp(regs[2] * prio_scaling[2]) w = w * tf.exp(regs[3] * prio_scaling[3]) else: regs = tf.reshape(regs, (-1, r_shape[-1])) regs0, regs1, regs2, regs3 = tf.unstack(regs, axis=1) cy = regs0 * h * prio_scaling[0] + cy cx = regs1 * w * prio_scaling[1] + cx h = h * tf.exp(regs2 * prio_scaling[2]) w = w * tf.exp(regs3 * prio_scaling[3]) ymin = cy - h / 2. xmin = cx - w / 2. ymax = cy + h / 2. xmax = cx + w / 2. bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=0) bboxes = tf.transpose(bboxes, perm=[1, 0]) bboxes = tf.reshape(bboxes, l_shape) bboxes = tf.clip_by_value(bboxes, 0.0, 1.0) return bboxes
def _get_ground_truth(self): map_shape = btf.combined_static_and_dynamic_shape(self.pred_maps[0][0]) output = tfop.open_pose_encode( keypoints=self.gt_keypoints, output_size=map_shape[1:3], glength=self.gt_length, keypoints_pair=self.cfg.POINTS_PAIRS, l_delta=self.cfg.OPENPOSE_L_DELTA, gaussian_delta=self.cfg.OPENPOSE_GAUSSIAN_DELTA) gt_conf_maps = output[0] gt_paf_maps = output[1] wsummary.feature_map_summary(gt_conf_maps, "gt_conf_maps", max_outputs=5) wsummary.feature_map_summary(gt_paf_maps, "gt_paf_maps", max_outputs=5) if self.cfg.USE_LOSS_MASK: B, H, W, _ = btf.combined_static_and_dynamic_shape(gt_paf_maps) image = tf.zeros([B, H, W, 1]) mask = odtl.batch_fill_bboxes(image, self.gt_boxes, v=1.0, length=self.gt_length, H=H, W=W, relative_coord=True) conf_mask = mask paf_mask = mask tf.summary.image("bboxes_mask", mask, max_outputs=5) else: conf_mask = None paf_mask = None return gt_paf_maps, gt_conf_maps, paf_mask, conf_mask
def fn1(boxes, l): nr, _ = btf.combined_static_and_dynamic_shape(boxes) boxes = boxes[:l, :] boxes, keep_pos = remove_bboxes_by_overlap(boxes, None, threshold) n_nr, _ = btf.combined_static_and_dynamic_shape(boxes) padding_nr = nr - n_nr boxes = tf.pad(boxes, [[0, padding_nr], [0, 0]]) keep_pos = tf.pad(keep_pos, [[0, padding_nr]]) return boxes, keep_pos, n_nr
def forward(self, net, batched_inputs, reuse=None): with tf.variable_scope("AddBBoxesSizeInfo"): reg_net = net shape = wmlt.combined_static_and_dynamic_shape(net) C = shape[-1] K = 4 with tf.variable_scope("pos_embedding"): pos_embs_shape = [1, shape[1], shape[2], K * C] pos_embedding = tf.get_variable( "pos_embs", shape=pos_embs_shape, dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.02)) bboxes = self.parent.t_proposal_boxes with tf.name_scope("trans_bboxes"): _, H, W, _ = btf.combined_static_and_dynamic_shape( batched_inputs[IMAGE]) bboxes = odb.tfrelative_boxes_to_absolutely_boxes(bboxes, W, H) bymin, bxmin, bymax, bxmax = tf.unstack(bboxes, axis=-1) bh = bymax - bymin bw = bxmax - bxmin br0 = bh / (bw + 1e-8) br1 = bw / (bh + 1e-8) bboxes = tf.stack([bh, bw, br0, br1], axis=-1) B, BN, BC = btf.combined_static_and_dynamic_shape(bboxes) bboxes = tf.reshape(bboxes, [B * BN, BC]) bboxes = tf.stop_gradient(bboxes) bboxes = slim.fully_connected(bboxes, C, activation_fn=self.activation_fn, normalizer_fn=self.normalizer_fn, normalizer_params=self.norm_params) bboxes = slim.fully_connected(bboxes, K * C, activation_fn=tf.nn.sigmoid, normalizer_fn=None) pos_embedding = tf.reshape(bboxes, [B * BN, 1, 1, K * C]) * pos_embedding pos_embedding = tf.layers.dense( pos_embedding, C, kernel_initializer=tf.truncated_normal_initializer( stddev=0.02)) cls_net = wnnl.non_local_blockv4(net, scope=f"non_local", normalizer_fn=wnnl.evo_norm_s0, activation_fn=None, n_head=4, weighed_sum=False, pos_embedding=pos_embedding) return cls_net, reg_net
def fn0(boxes, label, l): nr, _ = btf.combined_static_and_dynamic_shape(boxes) boxes = boxes[:l, :] label = label[:l] boxes, keep_pos = remove_bboxes_by_overlap(boxes, label, threshold) n_nr = btf.combined_static_and_dynamic_shape(keep_pos)[0] padding_nr = nr - n_nr keep_pos = tf.pad(keep_pos, [[0, padding_nr]]) n_nr, _ = btf.combined_static_and_dynamic_shape(boxes) padding_nr = nr - n_nr boxes = tf.pad(boxes, [[0, padding_nr], [0, 0]]) return boxes, keep_pos, n_nr
def get_iou_matrix(boxes0, boxes1): X, _ = btf.combined_static_and_dynamic_shape(boxes0) Y, _ = btf.combined_static_and_dynamic_shape(boxes1) boxes0 = tf.expand_dims(boxes0, axis=1) boxes0 = tf.tile(boxes0, [1, Y, 1]) boxes1 = tf.expand_dims(boxes1, axis=0) boxes1 = tf.tile(boxes1, [X, 1, 1]) boxes0 = tf.reshape(boxes0, [-1, 4]) boxes1 = tf.reshape(boxes1, [-1, 4]) ious = batch_bboxes_jaccard(boxes0, boxes1) return tf.reshape(ious, [X, Y])
def get_bboxes_intersection_matrix(boxes0, boxes1): X, _ = btf.combined_static_and_dynamic_shape(boxes0) Y, _ = btf.combined_static_and_dynamic_shape(boxes1) boxes0 = tf.expand_dims(boxes0, axis=1) boxes0 = tf.tile(boxes0, [1, Y, 1]) boxes1 = tf.expand_dims(boxes1, axis=0) boxes1 = tf.tile(boxes1, [X, 1, 1]) boxes0 = tf.reshape(boxes0, [-1, 4]) boxes1 = tf.reshape(boxes1, [-1, 4]) scores = bboxes_intersection(boxes0, boxes1) return tf.reshape(scores, [X, Y])
def keypoints_image_summary(images, keypoints=None, lengths=None, max_instance_to_draw=20, keypoints_pair=None, name="keypoints_image_summary", max_outputs=3): """Draws bounding keypoints on batch of image tensors. Args: images: A 4D uint8 image tensor of shape [N, H, W, C]. keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2] (x,y) with keypoints. max_instance_to_draw: Maximum number of instance to draw on an image. Default 20. Returns: 4D image tensor of type uint8, with boxes drawn on top. """ assert len(keypoints.get_shape() ) == 4, f"error keypoints dims {len(keypoints.get_shape())}" assert len(images.get_shape() ) == 4, f"error images dims {len(images.get_shape())}" images = images[:max_outputs] keypoints = keypoints[:max_outputs] if lengths is not None: lengths = lengths[:max_outputs] assert len(lengths.get_shape() ) == 1, f"error length dims {len(lengths.get_shape())}" with tf.device(":/cpu:0"): if images.get_shape().as_list()[0] is None: nr = tf.reduce_prod(tf.shape(keypoints)) B, H, W, C = btf.combined_static_and_dynamic_shape(images) B, MD, num_keypoints, _ = btf.combined_static_and_dynamic_shape( images) images = tf.cond(tf.greater(B, 0), lambda: images, lambda: tf.ones([1, H, W, C], dtype=images.dtype)) boxes = tf.cond(tf.greater(nr, 0), lambda: boxes, lambda: tf.ones([1, 1, 4], dtype=boxes.dtype)) keypoints = tf.cond( tf.greater(nr, 0), lambda: keypoints, lambda: tf.zeros([B, 1, num_keypoints, 2], dtype=keypoints.dtype)) images = imv.draw_keypoints_image_summary( images, keypoints, keypoints_pair=keypoints_pair, lengths=lengths, max_instance_to_draw=max_instance_to_draw) tf.summary.image(name, images, max_outputs=max_outputs)
def ae_loss_for_single_img(self, net, indexs, nr): indexs = indexs[:nr] fnr = tf.cast(nr, tf.float32) with tf.name_scope("pull_loss"): net = tf.reshape(net, [-1]) old_shape = btf.combined_static_and_dynamic_shape(indexs) ids = tf.gather(net, tf.nn.relu(tf.reshape(indexs, [-1]))) ids = tf.reshape(ids, old_shape) weights = tf.cast(tf.greater_equal(indexs, 0), tf.float32) mean = tf.reduce_sum( ids * weights, axis=-1, keepdims=True) / tf.maximum( tf.reduce_sum(weights, axis=-1, keepdims=True), 1e-8) pull_loss = tf.square(ids - mean) * weights pull_loss = tf.reduce_sum(pull_loss) / fnr with tf.name_scope("push_loss"): range = tf.range(nr) X, Y = tf.meshgrid(range, range) X = tf.reshape(X, [-1]) Y = tf.reshape(Y, [-1]) mask = tf.not_equal(X, Y) ftotal_nr = tf.reduce_sum(tf.cast(mask, tf.float32)) + 1e-8 X = tf.boolean_mask(X, mask) Y = tf.boolean_mask(Y, mask) loss = -tf.square(tf.gather(mean, X) - tf.gather(mean, Y)) loss = tf.exp(loss) loss = tf.reduce_sum(loss) loss = loss / ftotal_nr push_loss = loss * 0.5 return pull_loss, push_loss
def keypoints_flip_left_right(keypoints,swap_index=None): x,y = tf.unstack(keypoints,axis=-1) org_x = x cond = tf.logical_and(x>=0,y>=0) x = 1.0-x x = tf.where(cond,x,org_x) if swap_index is not None: swap_dict = {} for a,b in swap_index: swap_dict[a] = b swap_dict[b] = a X,N,_ = btf.combined_static_and_dynamic_shape(keypoints) indexs = [] for i in range(N): if i in swap_dict: indexs.append(swap_dict[i]) else: indexs.append(i) indexs = tf.convert_to_tensor(indexs,dtype=tf.int32) indexs = tf.reshape(indexs,[1,N]) indexs = tf.tile(indexs,[X,1]) x = tf.batch_gather(x,indexs) y = tf.batch_gather(y,indexs) return tf.stack([x,y],axis=-1)
def remove_bboxes_by_overlapv2(bboxes, labels=None, threshold=0.5): scores = get_bboxes_intersection_matrix(bboxes, bboxes) R, _ = btf.combined_static_and_dynamic_shape(scores) scores = scores * (1.0 - tf.eye(R)) scores_t = tf.transpose(scores) scores_x = tf.reshape(tf.cast(tf.range(R * R), tf.float32), [R, R]) scores_xt = tf.transpose(scores_x) faild_pos0 = tf.logical_and(tf.greater(scores, threshold), tf.less_equal(scores_t, threshold)) faild_pos1 = tf.logical_and( tf.logical_and(tf.greater(scores, threshold), tf.greater(scores_t, threshold)), tf.greater(scores, scores_t)) faild_pos2 = tf.logical_and( tf.logical_and(tf.greater(scores, threshold), tf.greater(scores_t, threshold)), tf.equal(scores, scores_t)) faild_pos2 = tf.logical_and(tf.greater(scores_x, scores_xt), faild_pos2) faild_pos = tf.logical_or(faild_pos0, faild_pos1) faild_pos = tf.logical_or(faild_pos, faild_pos2) if labels is not None: labels0 = tf.reshape(labels, [R, 1]) labels0 = tf.tile(labels0, [1, R]) labels1 = tf.reshape(labels, [1, R]) labels1 = tf.tile(labels1, [R, 1]) test_pos = tf.equal(labels0, labels1) faild_pos = tf.logical_and(faild_pos, test_pos) faild_pos = tf.reduce_any(faild_pos, axis=1, keepdims=False) keep_pos = tf.logical_not(faild_pos) bboxes = tf.boolean_mask(bboxes, keep_pos) return bboxes, keep_pos
def get_bboxes_dis(boxes0, boxes1): X, _ = btf.combined_static_and_dynamic_shape(boxes0) Y, _ = btf.combined_static_and_dynamic_shape(boxes1) boxes0 = tf.expand_dims(boxes0, axis=1) boxes0 = tf.tile(boxes0, [1, Y, 1]) boxes1 = tf.expand_dims(boxes1, axis=0) boxes1 = tf.tile(boxes1, [X, 1, 1]) boxes0 = tf.reshape(boxes0, [-1, 4]) boxes1 = tf.reshape(boxes1, [-1, 4]) boxes0 = get_bboxes_center_point(boxes0) boxes1 = get_bboxes_center_point(boxes1) dis = tf.square(boxes0 - boxes1) dis = tf.reduce_sum(dis, axis=-1, keepdims=False) dis = tf.sqrt(dis) return tf.reshape(dis, [X, Y])
def get_threshold(self, iou_matrix): ''' iou_matrix: [B,GT_nr,Anchor_nr] X = GT_nr, Y=Anchor_nr return: [B,GT] ''' B, X, Y = btf.combined_static_and_dynamic_shape(iou_matrix) iou_matrix = tf.reshape(iou_matrix, [B * X, Y]) def fn(ious): mask = tf.greater(ious, self.MIN_IOU_THRESHOLD) def fn0(): p_ious = tf.boolean_mask(ious, mask) mean, var = tf.nn.moments(p_ious, axes=-1) std = tf.sqrt(var) return mean + std def fn1(): return tf.constant(1.0, dtype=tf.float32) return tf.cond(tf.reduce_any(mask), fn0, fn1) threshold = tf.map_fn(fn, elems=iou_matrix, back_prop=False) threshold = tf.reshape(threshold, [B, X]) return tf.stop_gradient(threshold)
def row_image_summaries(imgs, name="image_contrast", max_outputs=3, margin=10, resize=False, is_hsv=False): with tf.name_scope(name): is_channel_equal = True shape = btf.combined_static_and_dynamic_shape(imgs[0]) log_image = tf.identity(imgs[0][:max_outputs]) channel = log_image.get_shape().as_list()[-1] for i in range(1, len(imgs)): if imgs[i].get_shape().as_list()[-1] != channel: is_channel_equal = False break if not is_channel_equal: if log_image.get_shape().as_list()[-1] == 1: log_image = tf.tile(log_image, [1, 1, 1, 3]) for i in range(1, len(imgs)): log_image = tf.pad(log_image, paddings=[[0, 0], [0, 0], [0, margin], [0, 0]]) img = imgs[i][:max_outputs] if resize: img = tf.image.resize_nearest_neighbor(img, size=shape[1:3]) if not is_channel_equal: if img.get_shape().as_list()[-1] == 1: img = tf.tile(img, [1, 1, 1, 3]) log_image = tf.concat([log_image, img], axis=2) if is_hsv: log_image = tf.image.hsv_to_rgb(log_image) * 2.0 - 1.0 image_summaries(log_image, "image_contrast")
def tfbatch_absolutely_boxes_to_relative_boxes(boxes, width, height): with tf.name_scope("batch_absolutely_boxes_to_relative_boxes"): batch_size, N, box_dim = btf.combined_static_and_dynamic_shape(boxes) boxes = tf.reshape(boxes, [-1, box_dim]) res = tfabsolutely_boxes_to_relative_boxes(boxes, width, height) res = tf.reshape(res, [batch_size, N, box_dim]) return res
def keypoits_rotate(keypoints,angle,width,height): r_angle = -angle * 3.1415926 / 180 cos = tf.cos(r_angle) sin = tf.sin(r_angle) m = tf.stack([cos, -sin, sin, cos]) m = tf.reshape(m,[2,2]) old_shape = btf.combined_static_and_dynamic_shape(keypoints) keypoints = tf.reshape(keypoints,[-1,2]) org_x,org_y = tf.unstack(keypoints,axis=-1) width = tf.to_float(width) height = tf.to_float(height) keypoints = keypoints-tf.convert_to_tensor([[(width-1)/2,(height-1)/2]]) keypoints = tf.matmul(m,keypoints,transpose_b=True) keypoints = tf.transpose(keypoints,[1,0])+tf.convert_to_tensor([[(width-1)/2,(height-1)/2]]) x,y = tf.unstack(keypoints,axis=-1) cond = tf.logical_and(org_x>=0,org_y>=0) x = tf.where(cond,x,org_x) y = tf.where(cond,y,org_y) cond = tf.logical_and(tf.logical_and(x>=0,x<width), tf.logical_and(y>=0,y<height)) x = tf.where(cond,x,tf.ones_like(x)*-100) y = tf.where(cond,y,tf.ones_like(y)*-100) keypoints = tf.stack([x,y],axis=-1) keypoints = tf.reshape(keypoints,old_shape) return keypoints
def adjust(self, ans, det): locs = ans[..., :2] values = ans[..., 2] x, y = tf.unstack(locs, axis=-1) org_x, org_y = x, y xx = tf.cast(x, tf.int32) yy = tf.cast(y, tf.int32) B, H, W, num_keypoints = btf.combined_static_and_dynamic_shape(det) det = tf.transpose(det, [0, 3, 1, 2]) det = tf.reshape(det, [B * num_keypoints, H * W]) yy_p = tf.minimum(yy + 1, H - 1) yy_n = tf.maximum(yy - 1, 0) xx_p = tf.minimum(xx + 1, W - 1) xx_n = tf.maximum(xx - 1, 0) def get_values(_xx, _yy): B, N, KN = btf.combined_static_and_dynamic_shape(_xx) _xx = tf.transpose(_xx, [0, 2, 1]) _yy = tf.transpose(_yy, [0, 2, 1]) _xx = tf.reshape(_xx, [B * KN, N]) _yy = tf.reshape(_yy, [B * KN, N]) index = _xx + _yy * W vals = tf.batch_gather(det, index) vals = tf.reshape(vals, [B, KN, N]) vals = tf.transpose(vals, [0, 2, 1]) return vals y_p = y + 0.25 y_n = y - 0.25 x_p = x + 0.25 x_n = x - 0.25 y = tf.where(get_values(xx, yy_p) > get_values(xx, yy_n), y_p, y_n) x = tf.where(get_values(xx_p, yy) > get_values(xx_n, yy), x_p, x_n) x = x + 0.5 y = y + 0.5 x = tf.where(values > 0, x, org_x) y = tf.where(values > 0, y, org_y) loc = tf.stack([x, y], axis=-1) B, N, KP, C = btf.combined_static_and_dynamic_shape(ans) _, data = tf.split(ans, [2, C - 2], axis=-1) return tf.concat([loc, data], axis=-1)
def draw_graph(img,points,adj_mt,relative_coordinate=True): if relative_coordinate: shape = tf.shape(img) points = points*tf.cast(tf.convert_to_tensor([[shape[1],shape[0]]]),tf.float32) points = tf.to_int32(points) old_shape = btf.combined_static_and_dynamic_shape(img) img = tf.py_func(__draw_graph,inp=(img,points,adj_mt),Tout=img.dtype,stateful=False) return tf.reshape(img,old_shape)
def is_center_in_boxes(boxes0, boxes1): X, _ = btf.combined_static_and_dynamic_shape(boxes0) Y, _ = btf.combined_static_and_dynamic_shape(boxes1) boxes0 = tf.expand_dims(boxes0, axis=1) boxes0 = tf.tile(boxes0, [1, Y, 1]) boxes1 = tf.expand_dims(boxes1, axis=0) boxes1 = tf.tile(boxes1, [X, 1, 1]) boxes0 = tf.reshape(boxes0, [-1, 4]) boxes1 = tf.reshape(boxes1, [-1, 4]) boxes1 = get_bboxes_center_point(boxes1) d0 = tf.greater_equal(boxes1[:, 0], boxes0[:, 0]) d1 = tf.less_equal(boxes1[:, 0], boxes0[:, 2]) d2 = tf.greater_equal(boxes1[:, 1], boxes0[:, 1]) d3 = tf.less_equal(boxes1[:, 1], boxes0[:, 3]) res = tf.logical_and(tf.logical_and(d0, d1), tf.logical_and(d2, d3)) return tf.reshape(res, [X, Y])
def forward(self, boxes, gboxes, glabels, glength, *args, **kwargs): ''' :param boxes: [1,X,4] or [batch_size,X,4] proposal boxes :param gboxes: [batch_size,Y,4] groundtruth boxes :param glabels: [batch_size,Y] groundtruth labels :param glength: [batch_size] boxes size :return: labels: [batch_size,X,4], the label of boxes, -1 indict ignored box, which will not calculate loss, 0 is background scores: [batch_size,X], the overlap score with boxes' match gt box indices: [batch_size,X] the index of matched gt boxes when it's a positive anchor box, else it's -1 ''' with tf.name_scope("ATTSMatcher4"): iou_matrix = odb.batch_bboxes_pair_wrapv2(gboxes, boxes, fn=odb.get_iou_matrix, len0=glength, scope="get_iou_matrix") is_center_in_gtboxes = odb.batch_bboxes_pair_wrapv2( gboxes, boxes, fn=odb.is_center_in_boxes, len0=glength, dtype=tf.bool, scope="get_is_center_in_gtbboxes") wsummary.variable_summaries_v2(iou_matrix, "iou_matrix") with tf.device("/cpu:0"): iou_threshold = self.get_threshold(iou_matrix) iou_threshold = tf.minimum(iou_threshold, self.thresholds[-1]) iou_matrix = tf.where(is_center_in_gtboxes, iou_matrix, tf.zeros_like(iou_matrix)) scores, index = tf.nn.top_k(tf.transpose(iou_matrix, perm=[0, 2, 1]), k=1) B, Y, _ = btf.combined_static_and_dynamic_shape(gboxes) index = tf.squeeze(index, axis=-1) scores = tf.squeeze(scores, axis=-1) threshold = wmlt.batch_gather(iou_threshold, index) labels = wmlt.batch_gather(glabels, index, name="gather_labels", parallel_iterations=B, back_prop=False) is_good_score = tf.greater(scores, self.MIN_IOU_THRESHOLD) is_good_score = tf.logical_and(is_good_score, scores >= threshold) labels = tf.where(is_good_score, labels, tf.zeros_like(labels)) index = tf.where(is_good_score, index, tf.ones_like(index) * -1) if self.same_pos_label: labels = tf.where(tf.greater(labels, 0), tf.ones_like(labels) * self.same_pos_label, labels) return tf.stop_gradient(labels), tf.stop_gradient( scores), tf.stop_gradient(index)
def get_values(_xx, _yy): B, N, KN = btf.combined_static_and_dynamic_shape(_xx) _xx = tf.transpose(_xx, [0, 2, 1]) _yy = tf.transpose(_yy, [0, 2, 1]) _xx = tf.reshape(_xx, [B * KN, N]) _yy = tf.reshape(_yy, [B * KN, N]) index = _xx + _yy * W vals = tf.batch_gather(det, index) vals = tf.reshape(vals, [B, KN, N]) vals = tf.transpose(vals, [0, 2, 1]) return vals
def get_random_crop_bboxes(img, size): shape = btf.combined_static_and_dynamic_shape(img) if len(shape) == 4: shape = shape[1:] new_size = tf.minimum(size, shape[:2]) mh = tf.maximum(0, shape[0] - size[0]) mw = tf.maximum(0, shape[1] - size[1]) xmin = tf.random.uniform((), 0, mw + 1, dtype=tf.int32) ymin = tf.random.uniform((), 0, mh + 1, dtype=tf.int32) xmax = xmin + new_size[1] ymax = ymin + new_size[0] return tf.convert_to_tensor([ymin, xmin, ymax, xmax], dtype=tf.int32)
def get_bboxes_center_point(data): old_shape = btf.combined_static_and_dynamic_shape(data) old_shape[-1] = 2 data = tf.reshape(data, [-1, 4]) ymin, xmin, ymax, xmax = tf.unstack(data, axis=1) cy = (ymin + ymax) / 2. cx = (xmin + xmax) / 2. data = tf.concat([tf.expand_dims(cy, -1), tf.expand_dims(cx, axis=-1)], axis=1) data = tf.reshape(data, old_shape) return data
def top_k(self, det, tags): det = wnnl.pixel_nms(det, kernel=self.cfg.HRNET_PE_NMS_KERNEL) max_num_people = self.cfg.HRNET_PE_MAX_NUM_PEOPLE B, H, W, num_keypoints = btf.combined_static_and_dynamic_shape(det) det = tf.reshape(det, [B, H * W, num_keypoints]) det = tf.transpose(det, [0, 2, 1]) val_k, indices = tf.nn.top_k(det, k=max_num_people) B, H, W, num_keypoints, C = btf.combined_static_and_dynamic_shape(tags) tags = tf.reshape(tags, [B, H * W, num_keypoints, C]) tags = tf.transpose(tags, [0, 2, 1, 3]) tag_k = tf.batch_gather(tags, indices) x = indices % W y = indices // H loc_k = tf.stack([x, y], axis=-1) loc_k = tf.cast(loc_k, tf.float32) return tag_k, loc_k, val_k
def concat_shuffle_split(x, y): with tf.name_scope('concat_shuffle_split'): shape = btf.combined_static_and_dynamic_shape(x) batch_size = shape[0] height, width = shape[1], shape[2] depth = x.shape[3].value z = tf.stack([x, y], axis=3) # shape [batch_size, height, width, 2, depth] z = tf.transpose(z, [0, 1, 2, 4, 3]) z = tf.reshape(z, [batch_size, height, width, 2 * depth]) x, y = tf.split(z, num_or_size_splits=2, axis=3) return x, y
def to_patch(self, images, scope=None): with tf.name_scope(scope, "to_patch"): patch_size = self.patch_size batch_size, height, width, channel = btf.combined_static_and_dynamic_shape( images) self.batch_size, self.height, self.width, self.channel = batch_size, height, width, channel net = tf.reshape(images, [ batch_size, height // patch_size, patch_size, width // patch_size, patch_size, channel ]) net = tf.transpose(net, [0, 1, 3, 2, 4, 5]) self.patchs = tf.reshape(net, [-1, patch_size, patch_size, channel]) return self.patchs
def concat_images(images, margin=10): with tf.name_scope("concat_images"): new_images = [] mean = tf.reduce_mean(images[0]) B, H, W, C = btf.combined_static_and_dynamic_shape(images[0]) size = [H, W] if margin is not None: margin_img = tf.ones([B, H, margin, C], dtype=images[0].dtype) * mean for i, img in enumerate(images): if margin is not None and i > 0: new_images.append(margin_img) new_images.append(tf.image.resize_bilinear(img, size=size)) return tf.concat(new_images, axis=2)
def inference(self, inputs, logits): size = btf.img_size(inputs[IMAGE]) logits = tf.image.resize_bilinear(logits, size, align_corners=True) shape = btf.combined_static_and_dynamic_shape(logits) shape[-1] = 1 background = tf.ones(shape=shape, dtype=tf.float32) * self.cfg.SCORE_THRESH_TEST probs = tf.nn.sigmoid(logits) probs = tf.concat([background, probs], axis=-1) mask = tf.argmax(probs, 3) semantic = tf.one_hot(mask, depth=self.num_classes + 1, on_value=1.0, off_value=0.0) return {RD_SPARSE_SEMANTIC: mask, RD_SEMANTIC: semantic}