def generate_anchors_opr(self, fm_3x3, fm_stride, anchor_scales=(8, 16, 32, 64, 128), anchor_ratios=(1, 2, 3), base_size=4): np_anchors = generate_anchors(base_size=base_size, ratios=np.array(anchor_ratios), scales=np.array(anchor_scales)) device = fm_3x3.device anchors = mge.tensor(np_anchors).to(device) height, width = fm_3x3.shape[2], fm_3x3.shape[3] shift_x = F.linspace(0, width - 1, width).to(device) * fm_stride shift_y = F.linspace(0, height - 1, height).to(device) * fm_stride broad_shift_x = F.broadcast_to(shift_x.reshape(1, -1), (height, width)).flatten() broad_shift_y = F.broadcast_to(shift_y.reshape(-1, 1), (height, width)).flatten() shifts = F.stack( [broad_shift_x, broad_shift_y, broad_shift_x, broad_shift_y], axis=1) c = anchors.shape[1] all_anchors = F.expand_dims(anchors, axis=0) + F.expand_dims(shifts, axis=1) all_anchors = all_anchors.reshape(-1, c).detach() return all_anchors
def get_center_offsets(self, featmap, stride): # f_shp = featmap.shape # fm_height, fm_width = f_shp[-2], f_shp[-1] fm_height, fm_width = featmap.shape[2:] shift_x = F.linspace(0, fm_width - 1, fm_width) * stride shift_y = F.linspace(0, fm_height - 1, fm_height) * stride # make the mesh grid of shift_x and shift_y mesh_shape = (fm_height, fm_width) broad_shift_x = F.broadcast_to(shift_x.reshape(1, -1), mesh_shape) broad_shift_y = F.broadcast_to(shift_y.reshape(-1, 1), mesh_shape) # broad_shift_x = shift_x.reshape(-1, shift_x.shape[0]).broadcast_to(*mesh_shape) # broad_shift_y = shift_y.reshape(shift_y.shape[0], -1).broadcast_to(*mesh_shape) flatten_shift_x = broad_shift_x.flatten() flatten_shift_y = broad_shift_y.flatten() shifts = F.stack([ flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y ], axis=1) # flatten_shift_x = F.add_axis(broad_shift_x.reshape(-1), 1) # flatten_shift_y = F.add_axis(broad_shift_y.reshape(-1), 1) # shifts = F.concat( # [flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y,], # axis=1) return shifts
def meshgrid(x, y): assert len(x.shape) == 1 assert len(y.shape) == 1 mesh_shape = (y.shape[0], x.shape[0]) mesh_x = F.broadcast_to(x, mesh_shape) mesh_y = F.broadcast_to(y.reshape(-1, 1), mesh_shape) return mesh_x, mesh_y
def _get_mat3x3(self, image): """get perspective matrix used in the transformation note: there are only 8 degrees of freedom in a perspective matrix, while the output matrix has 9 variables. Args: image (Tensor): input images (shape: n * 3 * 112 * 112) Returns: mat3x3 (Tensor): perspective matrix (shape: n * 3 * 3) """ x = self.stem(image) x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.fc(x) s = self.input_size # 0.01 here is a magic number. it aims to maintain identity transform at early stage of training residual = x.reshape(-1, 3, 3) * 0.01 base = mge.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]).astype("float32") base = F.broadcast_to(base, residual.shape) left_scale = mge.tensor([[s, 0, 0], [0, s, 0], [0, 0, 1]]).astype("float32") left_scale = F.broadcast_to(left_scale, residual.shape) right_scale = mge.tensor([[1 / s, 0, 0], [0, 1 / s, 0], [0, 0, 1]]).astype("float32") right_scale = F.broadcast_to(right_scale, residual.shape) mat3x3 = F.matmul(left_scale, F.matmul(base + residual, right_scale)) return mat3x3
def mask_anchor_opr(gtboxes, im_info, anchors, labels): eps = 1e-6 gtboxes = gtboxes[:im_info[5].astype(np.int32), :] ignore_mask = (gtboxes[:, 4] < 0).astype(np.float32) mask_flag = F.zeros(labels.shape[0]) N, K = anchors.shape[0], gtboxes.shape[0] p_pred = F.broadcast_to(F.expand_dims(anchors, 1), (N, K, anchors.shape[1])) p_gt = F.broadcast_to(F.expand_dims(gtboxes, 0), (N, K, gtboxes.shape[1])) max_off = F.concat([ F.maximum(p_pred[:, :, :2], p_gt[:, :, :2]), F.minimum(p_pred[:, :, 2:4], p_gt[:, :, 2:4]) ], axis=2) I = F.maximum(max_off[:, :, 2] - max_off[:, :, 0] + 1, 0) * F.maximum( max_off[:, :, 3] - max_off[:, :, 1] + 1, 0) A = F.maximum(p_pred[:, :, 2] - p_pred[:, :, 0] + 1, 0) * F.maximum( p_pred[:, :, 3] - p_pred[:, :, 1] + 1, 0) # I = F.maximum(I, 0) # A = F.maximum(A, 0) IoA = I / (A + eps) IoA = IoA * F.expand_dims(ignore_mask, 0) mask_flag = (IoA > 0.5).sum(axis=1) > 0 labels = labels - F.equal(labels, 0).astype(np.float32) * mask_flag.astype( np.float32) return labels
def meshgrid(x, y): """meshgrid wrapper for megengine""" assert len(x.shape) == 1 assert len(y.shape) == 1 mesh_shape = (y.shape[0], x.shape[0]) mesh_x = F.broadcast_to(x, mesh_shape) mesh_y = F.broadcast_to(y.reshape(-1, 1), mesh_shape) return mesh_x, mesh_y
def test_broadcast_auto_infer(is_varnode): if is_varnode: network = Network() else: network = None x = np.random.random((1, 2, 3)).astype(np.float32) xx = make_tensor(x, network) for shape in [ (1, 2, 3), (1, None, 3), ]: yy = F.broadcast_to(xx, shape) np.testing.assert_equal(yy.numpy(), x) with pytest.raises(ValueError): F.broadcast_to(xx, (1, -1, 3)) with pytest.raises(ValueError): F.broadcast_to(xx, (None, 1, 2, 3)) F.broadcast_to(xx, (1, None, 2, 3)) t = tensor(2, dtype=np.int32) F.broadcast_to(xx, (t, None, 2, 3))
def box_overlap_ignore_opr(box: Tensor, gt: Tensor, ignore_label=-1) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ # box = boxes1 # gt = boxes2 # target_shape = (boxes1.shapeof()[0], boxes2.shapeof()[0], 4) eps = 1e-5 N, K = box.shape[0], gt.shape[0] b_box = F.broadcast_to(F.expand_dims(box, 1), (N, K, box.shape[1])) b_gt = F.broadcast_to(F.expand_dims(gt, 0), (N, K, gt.shape[1])) # b_box = F.add_axis(boxes1, 1).broadcast(*target_shape) # b_gt = F.add_axis(boxes2[:, :4], 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0]) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1]) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = F.maximum(box[:, 2] - box[:, 0], 0) * F.maximum( box[:, 3] - box[:, 1], 0) area_gt = F.maximum(gt[:, 2] - gt[:, 0], 0) * F.maximum( gt[:, 3] - gt[:, 1], 0) # area_target_shape = (box.shapeof()[0], gt.shapeof()[0]) # b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) # b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) b_area_box = F.broadcast_to(F.expand_dims(area_box, 1), (N, K)) + eps b_area_gt = F.broadcast_to(F.expand_dims(area_gt, 0), (N, K)) union = b_area_box + b_area_gt - inter + eps overlaps_normal = F.maximum(inter / union, 0) overlaps_ignore = F.maximum(inter / b_area_box, 0) overlaps = F.maximum(inter / union, 0) # gt_ignore_mask = F.add_axis(F.equal(gt[:, 4], ignore_label), 0).broadcast(*area_target_shape) ignore_mask = F.equal(gt[:, 4], ignore_label) gt_ignore_mask = F.expand_dims(ignore_mask, 0) overlaps_normal *= (1 - gt_ignore_mask) overlaps_ignore *= gt_ignore_mask return overlaps_normal, overlaps_ignore
def box_overlap_opr(box: Tensor, gt: Tensor) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ # box = boxes1 # gt = boxes2 # target_shape = (boxes1.shape[0], boxes2.shape[0], 4) N, K = box.shape[0], gt.shape[0] b_box = F.broadcast_to(F.expand_dims(box, 1), (N, K, box.shape[1])) b_gt = F.broadcast_to(F.expand_dims(gt, 0), (N, K, gt.shape[1])) # b_gt = F.expand_dims(gt, 0).broadcast_to(N, K, gt.shape[1]) # b_box = F.expand_dims(boxes1, 1).broadcast(*target_shape) # b_gt = F.expand_dims(boxes2, 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0]) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1]) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = F.maximum(box[:, 2] - box[:, 0], 0) * F.maximum( box[:, 3] - box[:, 1], 0) area_gt = F.maximum(gt[:, 2] - gt[:, 0], 0) * F.maximum( gt[:, 3] - gt[:, 1], 0) # area_target_shape = (box.shape[0], gt.shapeof()[0]) b_area_box = F.broadcast_to(F.expand_dims(area_box, 1), (N, K)) b_area_gt = F.broadcast_to(F.expand_dims(area_gt, 0), (N, K)) # b_area_box = F.expand_dims(area_box, 1).broadcast_to(N, K) # b_area_gt = F.expand_dims(area_gt, 0).broadcast_to(N, K) # b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) # b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) union = b_area_box + b_area_gt - inter overlaps = F.maximum(inter / union, 0) return overlaps
def anchor_iou_target_opr(self, boxes, im_info, all_anchors, rpn_bbox_offsets): n = rpn_bbox_offsets.shape[0] res = [] for i in range(n): gtboxes = boxes[i, :im_info[i, 5].astype(np.int32)] offsets = rpn_bbox_offsets[i].reshape(-1, 4).detach() m = offsets.shape[0] an, ac = all_anchors.shape[0], all_anchors.shape[1] anchors = F.broadcast_to(F.expand_dims(all_anchors, 1), (an, 1, ac)).reshape(-1, ac) dtboxes = bbox_transform_inv_opr(anchors[:, :4], offsets[:, :4]) overlaps = box_overlap_opr(dtboxes, gtboxes[:, :4]) ignore_mask = 1 - F.equal( gtboxes[:, 4], config.anchor_ignore_label).astype(np.float32) ignore_mask = F.expand_dims(ignore_mask, axis=0) overlaps = overlaps * ignore_mask index = F.argmax(overlaps, axis=1) value = F.nn.indexing_one_hot(overlaps, index, 1) value = F.expand_dims(F.expand_dims(value, axis=1), axis=0) res.append(value) result = F.concat(res, 0) return result
def forward(self, mid, ref): B, C, H, W = mid.shape mid = F.normalize(mid, p=2, axis=1) ref = F.normalize(ref, p=2, axis=1) cost_volume, ref = compute_cost_volume( mid, ref, max_displacement=self.d) # [B, (2d+1)**2, H, W] cost_volume = F.dimshuffle(cost_volume, (0, 2, 3, 1)) cost_volume = cost_volume.reshape((-1, (2 * self.d + 1)**2)) # argmax indices = F.top_k(cost_volume, k=self.K, descending=True)[1] # [B*H*W, K] del cost_volume ref_list = [] # [B, C, H, W] origin_i_j = F.arange(0, H * W, 1) # float32 origin_i = F.floor(origin_i_j / W) # (H*W, ) origin_j = F.mod(origin_i_j, W) # (H*W, ) del origin_i_j # reshape ref ref = ref.reshape((B, C, (H + 2 * self.d) * (W + 2 * self.d))) for i in range(self.K): index = indices[:, i] # [B*H*W, ] index = index.reshape((-1, H * W)) index_i = F.floor(index / (2 * self.d + 1)) + origin_i # [B, H*W] index_j = F.mod(index, (2 * self.d + 1)) + origin_j # [B, H*W] # 根据每个pixel的i,j 算出index index = index_i * W + index_j # [B, H*W] index = index.astype('int32') # add axis index = F.add_axis(index, axis=1) # [B, 1, H*W] # broadcast index = F.broadcast_to(index, (B, C, H * W)) # gather output = F.gather(ref, axis=2, index=index) # [B, C, H*W] ref_list.append(output.reshape((B, C, H, W))) return self.conv(F.concat(ref_list, axis=1))
def forward(self, in_tensor): avg_pool = F.avg_pool2d(in_tensor, (in_tensor.shape[2], in_tensor.shape[3]), stride=(in_tensor.shape[2], in_tensor.shape[3])) x = self.gate_c(avg_pool) x = F.expand_dims(x, axis=[2, 3]) # b,48,1 x = F.broadcast_to(x, in_tensor.shape) # b,48,h,w return x
def test_Broadcast(): x_np = np.random.rand(3, 3, 1).astype("float32") x = TensorWrapper(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = F.broadcast_to(x, (3, 3, 10)) grad(y, F.ones_like(y)) np.testing.assert_equal(np.ones((3, 3, 1), dtype=np.float32) * 10, x.grad.numpy())
def get_flow_mge(H_mat_mul, patch_indices, image_size_h=600, image_size_w=800): # (N, 6, 3, 3) batch_size = H_mat_mul.shape[0] divide = H_mat_mul.shape[1] H_mat_mul = mge.Tensor(H_mat_mul.reshape(batch_size, divide, 3, 3)) small_patch_sz = [image_size_h // divide, image_size_w] small = 1e-7 H_mat_pool = F.zeros((batch_size, image_size_h, image_size_w, 3, 3)) for i in range(divide): H_mat = H_mat_mul[:, i, :, :] if i == divide - 1: H_mat = F.broadcast_to(F.expand_dims(F.expand_dims(H_mat, 1), 1), (batch_size, image_size_h - i * small_patch_sz[0], image_size_w, 3, 3)) H_mat_pool[:, i * small_patch_sz[0]:, ...] = H_mat continue H_mat = F.broadcast_to(F.expand_dims(F.expand_dims( H_mat, 1), 1), (batch_size, small_patch_sz[0], image_size_w, 3, 3)) H_mat_pool[:, i * small_patch_sz[0]:(i + 1) * small_patch_sz[0], ...] = H_mat pred_I2_index_warp = F.expand_dims(patch_indices.transpose(0, 2, 3, 1), 4) pred_I2_index_warp = F.matmul(H_mat_pool, pred_I2_index_warp)[:, :, :, :, 0].transpose(0, 3, 1, 2) T_t = pred_I2_index_warp[:, 2:3, ...] smallers = 1e-6 T_t = T_t + smallers v1 = pred_I2_index_warp[:, 0:1, ...] v2 = pred_I2_index_warp[:, 1:2, ...] v1 = v1 / T_t v2 = v2 / T_t warp_index = F.concat((v1, v2), 1) vgrid = patch_indices[:, :2, ...] flow = warp_index - vgrid return flow
def recover_pred_boxes(self, rcnn_rois, prob, nhead): n = prob.shape[0] prob = prob.reshape(n, nhead, -1) prob = prob.reshape(-1, prob.shape[2]) cls_score, bbox_pred = prob[:, -self.n:], prob[:, :-self.n] cls_prob = F.softmax(cls_score, axis=1) m, c = rcnn_rois.shape rois = F.broadcast_to(F.expand_dims(rcnn_rois, axis = 1), (m, nhead, c)).reshape(-1, c) bbox_pred = bbox_pred.reshape(n * nhead, -1, 4) pred_boxes = restore_bbox(rois[:, 1:5], bbox_pred, config = config) cls_prob = F.expand_dims(cls_prob, axis=2) pred_boxes = F.concat([pred_boxes, cls_prob], axis=2) n, c = bbox_pred.shape[:2] bid = F.broadcast_to(F.expand_dims(rois[:, :1], axis=1), (n, c, 1)) pred_boxes = F.concat([pred_boxes, bid], axis = 2) return pred_boxes.detach()
def forward(self, data, quad): """ data: (1, 3, 48, 160) quad: (1, 4, 2) """ N = quad.shape[0] dst = F.repeat(self.bb_out, N, axis=0).reshape(-1, 4, 2) I = F.broadcast_to(self.I, quad.shape) A = F.broadcast_to(self.A, (N, 8, 8)) A[:, 0:4, 0:2] = quad A[:, 4:8, 5:6] = I[:, :, 0:1] A[:, 0:4, 6:8] = -quad * dst[:, :, 0:1] A[:, 4:8, 3:5] = quad A[:, 0:4, 2:3] = I[:, :, 0:1] A[:, 4:8, 6:8] = -quad * dst[:, :, 1:2] B = dst.transpose(0, 2, 1).reshape(-1, 8, 1) M = F.concat([F.matmul(F.matinv(A), B)[:, :, 0], I[:, 0:1, 0]], axis=1).reshape(-1, 3, 3) new_data = F.warp_perspective(data, M, (48, 160)) # (N, 3, 48, 160) return {"data": new_data}
def forward(self, data, idx, roi): N, H, W, C = data.shape xmax = roi[:, 1, 0] xmin = roi[:, 0, 0] ymax = roi[:, 1, 1] ymin = roi[:, 0, 1] scale = F.maximum((xmax - xmin) / W, (ymax - ymin) / H) I = F.broadcast_to(self.I, (N, )) M = F.broadcast_to(self.M, (N, 3, 3)) M[:, 0, 0] = scale M[:, 0, 2] = xmin M[:, 1, 1] = scale M[:, 1, 2] = ymin M[:, 2, 2] = I resized = (F.warp_perspective(data, M, (H, W), mat_idx=idx, border_mode="CONSTANT", format="NHWC").transpose( 0, 3, 1, 2).astype(np.float32)) return resized
def refinement_module(self, prob, fc2): m = prob.reshape(-1, 5*self.n) offsets, scores = m[:, :-self.n], m[:, -self.n:] n = offsets.shape[0] offsets = offsets.reshape(-1, self.n, 4) cls_scores = F.expand_dims(F.softmax(scores, axis=1), axis=2) pred_boxes = F.concat([offsets, cls_scores], axis=2)[:, 1] n, c = pred_boxes.shape pred_boxes = F.broadcast_to(F.expand_dims(pred_boxes, axis=1), (n, 6, c)).reshape(n,-1) n, c = fc2.shape fc3 = F.broadcast_to(F.expand_dims(fc2, axis=1), (n, 2, c)).reshape(-1, c) fc3 = F.concat([fc3, pred_boxes], axis=1) fc3 = self.relu(self.fc3(fc3)) fc3 = fc3.reshape(n, 2, -1).transpose(1, 0, 2) a = self.q(fc3[0]) b = self.r(fc3[1]) prob = F.stack([a, b], axis=1).reshape(-1, 10*self.n) return prob
def test_broadcast(): input1_shape = (20, 30) output1_shape = (30, 20, 30) data1 = np.random.random(input1_shape).astype(np.float32) input2_shape = (10, 1) output2_shape = (20, 10, 20) data2 = np.random.random(input2_shape).astype(np.float32) def compare_fn(x, y): assert x.shape[0] == y cases = [ { "input": [data1, output1_shape], "output": output1_shape }, { "input": [data2, output2_shape], "output": output2_shape }, ] opr_test(cases, F.broadcast_to, compare_fn=compare_fn) x = F.ones((2, 1, 3)) with pytest.raises(RuntimeError): F.broadcast_to(x, (2, 3, 4)) with pytest.raises(RuntimeError): F.broadcast_to(x, (4, 1, 3)) with pytest.raises(RuntimeError): F.broadcast_to(x, (1, 3))
def generate_anchors_by_features(self, sizes, device): all_anchors = [] assert len(sizes) == self.num_features, ( "input features expected {}, got {}".format(self.num_features, len(sizes)) ) for size, stride in zip(sizes, self.strides): grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device) grids = F.stack([grid_x, grid_y], axis=1) all_anchors.append( F.broadcast_to( F.expand_dims(grids, axis=1), (grids.shape[0], self.num_anchors, 2) ).reshape(-1, 2) ) # FIXME: need F.repeat return all_anchors
def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None): rcnn_rois, labels, bbox_targets = self.get_ground_truth( rcnn_rois, im_info, gt_boxes) fpn_fms = [fpn_fms[x] for x in self.in_features] pool_features = layers.roi_pool( fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method, ) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_logits = self.pred_cls(roi_feature) pred_offsets = self.pred_delta(roi_feature) if self.training: # loss for rcnn classification loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1) # loss for rcnn regression pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4) num_samples = labels.shape[0] fg_mask = labels > 0 loss_rcnn_bbox = layers.smooth_l1_loss( pred_offsets[fg_mask, labels[fg_mask] - 1], bbox_targets[fg_mask], self.cfg.rcnn_smooth_l1_beta, ).sum() / F.maximum(num_samples, 1) loss_dict = { "loss_rcnn_cls": loss_rcnn_cls, "loss_rcnn_bbox": loss_rcnn_bbox, } return loss_dict else: # slice 1 for removing background pred_scores = F.softmax(pred_logits, axis=1)[:, 1:] pred_offsets = pred_offsets.reshape(-1, 4) target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4) # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4) base_rois = F.broadcast_to( F.expand_dims(rcnn_rois[:, 1:5], axis=1), target_shape).reshape(-1, 4) pred_bbox = self.box_coder.decode(base_rois, pred_offsets) return pred_bbox, pred_scores
def forward(self, input_ids, token_type_ids=None): seq_length = input_ids.shape[1] if token_type_ids is None: token_type_ids = F.zeros_like(input_ids) position_ids = F.linspace(0, seq_length - 1, seq_length).astype(np.int32) position_ids = F.broadcast_to(F.expand_dims(position_ids, 0), input_ids.shape) words_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = words_embeddings + position_embeddings + token_type_embeddings embeddings = self.LayerNorm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def restore_bbox(rois, deltas, unnormalize=True, config=None): assert deltas.ndim == 3 if unnormalize: std_opr = mge.tensor(config.bbox_normalize_stds.reshape(1, 1, -1)) mean_opr = mge.tensor(config.bbox_normalize_means.reshape(1, 1, -1)) deltas = deltas * std_opr deltas = deltas + mean_opr # n = deltas.shape[1] n, c = deltas.shape[0], deltas.shape[1] all_rois = F.broadcast_to(F.expand_dims(rois, 1), (n, c, rois.shape[1])).reshape( -1, rois.shape[1]) deltas = deltas.reshape(-1, deltas.shape[2]) pred_bbox = bbox_transform_inv_opr(all_rois, deltas) pred_bbox = pred_bbox.reshape(-1, c, pred_bbox.shape[1]) return pred_bbox
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:] fpn_fms.reverse() stride = [4, 8, 16, 32] poo5, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) poo5 = F.flatten(poo5, start_axis=1) fc1 = F.relu(self.fc1(poo5)) fc2 = F.relu(self.fc2(fc1)) cls_scores = self.cls(fc2) pred_boxes = self.bbox(fc2) # a = self.a(fc2) # b = self.b(fc2) # prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1]) prob = F.concat([pred_boxes, cls_scores], axis=1) if self.training: # emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels) bbox_targets, labels = bbox_targets.reshape(-1, 4), labels.flatten() cls_loss = softmax_loss(cls_scores, labels) pred_boxes = pred_boxes.reshape(-1, self.n, 4) bbox_loss = smooth_l1_loss_rcnn(pred_boxes, bbox_targets, labels, \ config.rcnn_smooth_l1_beta) loss_dict = {} loss_dict['cls_loss'] = cls_loss loss_dict['bbox_loss'] = bbox_loss return loss_dict else: offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:] pred_bbox = offsets.reshape(-1, self.n, 4) cls_prob = F.softmax(cls_scores, axis=1) n = rcnn_rois.shape[0] rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1), (n, 1, 4)).reshape(-1, 4) normalized = config.rcnn_bbox_normalize_targets pred_boxes = restore_bbox(rois, pred_bbox, normalized, config) pred_bbox = F.concat([pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2) return pred_bbox
def smooth_l1_loss_rcnn_opr(pred, gt, label, sigma=1, background=0, ignore_label=-1): """ pred : (minibatch, class_num, 4) gt : (minibatch, 4) label : (minibatch, ) """ broadcast_label = F.broadcast_to(label.reshape(-1, 1), (1, pred.shape[-1])) broadcast_mask, broadcast_mask_ig = _get_mask_of_label( broadcast_label, background, ignore_label) vlabel = broadcast_label * broadcast_mask pred_corr = F.nn.indexing_one_hot(pred, vlabel.astype(np.int32), 1) value = _smooth_l1_base(pred_corr, gt, sigma) loss = (value * broadcast_mask).sum(dim=1) return loss
def _recover_dtboxes(self, anchors_list, rpn_cls_list, rpn_bbox_list, rpn_iou_list): assert rpn_cls_list[0].shape[0] == 1 all_anchors = F.concat(anchors_list, axis=0) rpn_cls_scores_final = F.concat(rpn_cls_list, axis=1)[0] rpn_bbox_offsets_final = F.concat(rpn_bbox_list, axis=1)[0] rpn_iou_prob_final = F.concat(rpn_iou_list, axis=1)[0] rpn_bbox_offsets = rpn_bbox_offsets_final.reshape(-1, 4) rpn_cls_scores = rpn_cls_scores_final.reshape(-1, 1) rpn_iou_prob = rpn_iou_prob_final.reshape(-1, 1) n, c = all_anchors.shape[0], all_anchors.shape[1] anchors = F.broadcast_to(F.expand_dims(all_anchors, 1), (n, 1, c)).reshape(-1, c) rpn_bbox = bbox_transform_inv_opr(anchors, rpn_bbox_offsets) pred_boxes = F.concat([rpn_bbox, rpn_cls_scores, rpn_iou_prob], axis=1) return pred_boxes
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:] fpn_fms.reverse() stride = [4, 8, 16, 32] poo5, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) poo5 = F.flatten(poo5, start_axis=1) fc1 = F.relu(self.fc1(poo5)) fc2 = F.relu(self.fc2(fc1)) a = self.a(fc2) b = self.b(fc2) prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1]) if self.refinement: final_prob = self.refinement_module(prob, fc2) if self.training: emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels) loss_dict = {} loss_dict['loss_rcnn_emd'] = emd_loss if self.refinement_module: final_emd_loss = self.compute_gemini_loss( final_prob, bbox_targets, labels) loss_dict['final_rcnn_emd'] = final_emd_loss return loss_dict else: offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:] pred_bbox = offsets.reshape(-1, self.n, 4) cls_prob = F.softmax(cls_scores, axis=1) n = rcnn_rois.shape[0] rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1), (n, 2, 4)).reshape(-1, 4) normalized = config.rcnn_bbox_normalize_targets pred_boxes = restore_bbox(rois, pred_bbox, normalized, config) pred_bbox = F.concat( [pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2) return pred_bbox
def forward(self, now_LR, pre_h_SD): """ now_LR: B,3,H,W pre_h_SD: B,48,H,W """ batch, C, H, W = pre_h_SD.shape kernels = self.conv(now_LR) # [B, k*k, H, W] batchwise_ans = [] for idx in range(batch): kernel = kernels[idx] # [k*k, H, W] kernel = F.dimshuffle(kernel, (1, 2, 0)) # [H, W , k*k] kernel = F.reshape(kernel, (H, W, 1, self.K, self.K, 1)) kernel = F.broadcast_to(kernel, (C, H, W, 1, self.K, self.K, 1)) batchwise_ans.append( F.local_conv2d( F.add_axis(pre_h_SD[idx], 0), kernel, [1, 1], [1, 1], [1, 1])) # [1, C, H, W] some bug with padding similarity_matrix = F.concat(batchwise_ans, axis=0) # [B,C,H,W] del batchwise_ans similarity_matrix = F.sigmoid(similarity_matrix) return F.multiply(pre_h_SD, similarity_matrix)
def test_broadcast(is_varnode): if is_varnode: network = Network() else: network = None input1_shape = (20, 30) output1_shape = (30, 20, 30) data1 = np.random.random(input1_shape).astype(np.float32) input2_shape = (10, 1) output2_shape = (20, 10, 20) data2 = np.random.random(input2_shape).astype(np.float32) input3_shape = (10, 10) output3_shape = (10, 10) data3 = np.random.random(input3_shape).astype(np.float32) def compare_fn(x, y): assert x._tuple_shape[0] == y cases = [ { "input": [data1, output1_shape], "output": output1_shape }, { "input": [data2, output2_shape], "output": output2_shape }, { "input": [data3, output3_shape], "output": output3_shape }, ] opr_test(cases, F.broadcast_to, compare_fn=compare_fn, network=network) x = F.ones((2, 1, 3)) with pytest.raises(RuntimeError): F.broadcast_to(x, (2, 3, 4)) with pytest.raises(RuntimeError): F.broadcast_to(x, (4, 1, 3)) with pytest.raises(RuntimeError): F.broadcast_to(x, (1, 3))
def forward(self, now_LR, pre_h_SD): """ now_LR: B,3,H,W pre_h_SD: B,64,H,W """ pad = self.K // 2 batch, C, H, W = pre_h_SD.shape kernels = self.conv(now_LR) # [B, k*k, H, W] # 对 pre_h_SD进行padding similarity_matrix = F.zeros_like(pre_h_SD) pre_h_SD = add_H_W_Padding(pre_h_SD, margin=pad) for i in range(self.K): for j in range(self.K): # 做点乘 kernel = kernels[:, i * self.K + j, :, :] # [B, H, W] kernel = F.add_axis(kernel, axis=1) # [B, 1 ,H, W] kernel = F.broadcast_to(kernel, [batch, C, H, W]) corr = kernel * pre_h_SD[:, :, i:(H + i), j:(W + j)] similarity_matrix = similarity_matrix + corr # [B, C, H, W] similarity_matrix = F.sigmoid(similarity_matrix) return F.multiply(pre_h_SD[:, :, pad:(H + pad), pad:(W + pad)], similarity_matrix)