def test_generator_batch(image, *, netG): # image: [1,100,3,180,320] B, T, _, h, w = image.shape biup = get_bilinear(image) netG.eval() forward_hiddens = [] backward_hiddens = [] res = [] hidden = F.zeros((2 * B, netG.hidden_channels, h, w)) for i in range(T): now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]], axis=0) if i == 0: flow = netG.flownet(now_frame, now_frame) else: ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]], axis=0) flow = netG.flownet(now_frame, ref) hidden = netG(hidden, flow, now_frame) forward_hiddens.append(hidden[0:B, ...]) backward_hiddens.append(hidden[B:2 * B, ...]) for i in range(T): res.append( netG.do_upsample(forward_hiddens[i], backward_hiddens[T - i - 1])) res = F.stack(res, axis=1) # [B,T,3,H,W] return res + biup
def test_preprocess(): module = Main() data = F.ones((1, 14, 8, 8), dtype=np.uint8) traced_module = trace_module(module, data) obj = pickle.dumps(traced_module) traced_module = pickle.loads(obj) module = Net(traced_module) module.eval() idx = F.zeros((1, ), dtype=np.int32) roi = F.ones((1, 2, 2), dtype=np.float32) y = module(data, idx, roi) traced_module = trace_module(module, data, idx, roi) np.testing.assert_array_equal(traced_module(data, idx, roi), y) func = trace(traced_module, capture_as_const=True) np.testing.assert_array_equal(func(data, idx, roi), y) model = io.BytesIO() func.dump(model, arg_names=("data", "idx", "roi")) model.seek(0) infer_cg = cgtools.GraphInference(model) np.testing.assert_allclose( list( infer_cg.run(inp_dict={ "data": data.numpy(), "idx": idx.numpy(), "roi": roi.numpy() }).values())[0], y, atol=1e-6, )
def anchor_iou_target_opr(self, boxes, im_info, all_anchors, rpn_bbox_offsets): n = rpn_bbox_offsets.shape[0] res = [] for i in range(n): gtboxes = boxes[i, :im_info[i, 5].astype(np.int32)] offsets = rpn_bbox_offsets[i].reshape(-1, 4).detach() m = offsets.shape[0] an, ac = all_anchors.shape[0], all_anchors.shape[1] anchors = F.broadcast_to(F.expand_dims(all_anchors, 1), (an, 2, ac)).reshape(-1, ac) dtboxes = bbox_transform_inv_opr(anchors[:, :4], offsets[:, :4]) overlaps = box_overlap_opr(dtboxes, gtboxes[:, :4]) ignore_mask = 1 - F.equal( gtboxes[:, 4], config.anchor_ignore_label).astype(np.float32) ignore_mask = F.expand_dims(ignore_mask, axis=0) overlaps = overlaps * ignore_mask overlaps = overlaps.reshape(-1, 2, overlaps.shape[1]).transpose(1, 0, 2) a, b = overlaps[0], overlaps[1] index = F.argmax(a, axis=1) a = F.nn.indexing_one_hot(a, index, 1) b = F.scatter(b, 1, index.reshape(-1, 1), F.zeros([b.shape[0], 1])) index = F.argmax(b, axis=1) b = F.nn.indexing_one_hot(b, index, 1) value = F.expand_dims(F.stack([a, b], axis=1), axis=0) res.append(value) result = F.concat(res, 0) return result
def __init__(self): super().__init__() self.A = F.zeros((1, )) self.I = F.ones((1, )) self.bb_out = mge.tensor( np.array([[[0, 0], [160, 0], [160, 48], [0, 48]]], dtype="float32"))
def train_generator_batch(image, label, *, gm, netG, netloss): B, T, _, h, w = image.shape biup = get_bilinear(image) netG.train() with gm: forward_hiddens = [] backward_hiddens = [] res = [] hidden = F.zeros((2 * B, netG.hidden_channels, h, w)) for i in range(T): now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]], axis=0) if i == 0: flow = netG.flownet(now_frame, now_frame) else: ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]], axis=0) flow = netG.flownet(now_frame, ref) hidden = netG(hidden, flow, now_frame) forward_hiddens.append(hidden[0:B, ...]) backward_hiddens.append(hidden[B:2 * B, ...]) for i in range(T): res.append( netG.do_upsample(forward_hiddens[i], backward_hiddens[T - i - 1])) res = F.stack(res, axis=1) # [B,T,3,H,W] loss = netloss(res + biup, label) gm.backward(loss) if dist.is_distributed(): loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() return loss
def mask_anchor_opr(gtboxes, im_info, anchors, labels): eps = 1e-6 gtboxes = gtboxes[:im_info[5].astype(np.int32), :] ignore_mask = (gtboxes[:, 4] < 0).astype(np.float32) mask_flag = F.zeros(labels.shape[0]) N, K = anchors.shape[0], gtboxes.shape[0] p_pred = F.broadcast_to(F.expand_dims(anchors, 1), (N, K, anchors.shape[1])) p_gt = F.broadcast_to(F.expand_dims(gtboxes, 0), (N, K, gtboxes.shape[1])) max_off = F.concat([ F.maximum(p_pred[:, :, :2], p_gt[:, :, :2]), F.minimum(p_pred[:, :, 2:4], p_gt[:, :, 2:4]) ], axis=2) I = F.maximum(max_off[:, :, 2] - max_off[:, :, 0] + 1, 0) * F.maximum( max_off[:, :, 3] - max_off[:, :, 1] + 1, 0) A = F.maximum(p_pred[:, :, 2] - p_pred[:, :, 0] + 1, 0) * F.maximum( p_pred[:, :, 3] - p_pred[:, :, 1] + 1, 0) # I = F.maximum(I, 0) # A = F.maximum(A, 0) IoA = I / (A + eps) IoA = IoA * F.expand_dims(ignore_mask, 0) mask_flag = (IoA > 0.5).sum(axis=1) > 0 labels = labels - F.equal(labels, 0).astype(np.float32) * mask_flag.astype( np.float32) return labels
def roi_pool( rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", ): rois = rois.detach() assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = int(math.log2(stride[0])) max_level = int(math.log2(stride[-1])) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) assigned_level = F.floor(canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / np.log(2)).astype("int32") assigned_level = F.minimum(assigned_level, max_level) assigned_level = F.maximum(assigned_level, min_level) assigned_level = assigned_level - min_level # avoid empty assignment assigned_level = F.concat([ assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device) ], ) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) pool_list, inds_list = [], [] for i in range(num_fms): _, inds = F.cond_take(assigned_level == i, assigned_level) level_rois = rois[inds] if pooler_type == "roi_pool": pool_fm = F.nn.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif pooler_type == "roi_align": pool_fm = F.nn.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.argsort(F.concat(inds_list, axis=0)) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature[fm_order][:-num_fms] return pool_feature
def test_rnn_cell(batch_size, input_size, hidden_size, init_hidden): rnn_cell = RNNCell(input_size, hidden_size) x = mge.random.normal(size=(batch_size, input_size)) if init_hidden: h = F.zeros(shape=(batch_size, hidden_size)) else: h = None h_new = rnn_cell(x, h) assert_tuple_equal(h_new.shape, (batch_size, hidden_size))
def test_lstm_cell(batch_size, input_size, hidden_size, init_hidden): rnn_cell = LSTMCell(input_size, hidden_size) x = mge.random.normal(size=(batch_size, input_size)) if init_hidden: h = F.zeros(shape=(batch_size, hidden_size)) hx = (h, h) else: hx = None h_new, c_new = rnn_cell(x, hx) assert_tuple_equal(h_new.shape, (batch_size, hidden_size)) assert_tuple_equal(c_new.shape, (batch_size, hidden_size))
def train_pipeline(): m = ResNet18Pipeline() x = F.ones([32, 3, 224, 224]) label = F.zeros([ 32, ], dtype="int32") gm = ad.GradManager().attach(m.parameters()) opt = optim.SGD(m.parameters(), 1e-3, 0.9, 1e-4) for _ in range(2): m(x) loss = m.backward(label, gm) opt.step().clear_grad() print(loss)
def create_mask_mge(tensor, paddings): shape = tensor.shape # N,c, H,W inner_width = shape[2] - (paddings[0][0] + paddings[0][1]) inner_height = shape[3] - (paddings[1][0] + paddings[1][1]) inner_mge = F.ones([shape[0], shape[1], inner_width, inner_height]) # .float() # need check cuda? outer_mge = F.zeros([shape[0], shape[1], shape[2], shape[3]]) # .float() outer_mge[:, :, paddings[0][0]:paddings[0][0] + inner_width, paddings[1][0]:paddings[1][0] + inner_height] = inner_mge # if tensor.is_cuda: # inner_torch = inner_torch.cuda() # mask2d = F.pad(inner_mge, [paddings[0][0], paddings[0][1], paddings[1][0], paddings[1][1]]) # no padding layer return outer_mge
def __init__(self, ir_graph, quantizer: Union[IRQuantizer, None] = None): self.ir_graph = ir_graph self.quantizer = quantizer self.map_ir_tensor_2_mge_tensor = {} # type: Dict[str, mge.tensor] self.tm = None self.arg_names = [] # type: List[str] self.inp_data = [] # type: List[mge.tensor] for input in ir_graph.graph_inputs: assert input.shape is not None and input.dtype is not None mge_input = F.zeros(shape=input.shape, dtype=input.dtype) self.inp_data.append(mge_input) self.arg_names.append(input.name) self.map_ir_tensor_2_mge_tensor[input.name] = mge_input self.graph_outputs = [o.name for o in ir_graph.graph_outputs]
def get_flow_mge(H_mat_mul, patch_indices, image_size_h=600, image_size_w=800): # (N, 6, 3, 3) batch_size = H_mat_mul.shape[0] divide = H_mat_mul.shape[1] H_mat_mul = mge.Tensor(H_mat_mul.reshape(batch_size, divide, 3, 3)) small_patch_sz = [image_size_h // divide, image_size_w] small = 1e-7 H_mat_pool = F.zeros((batch_size, image_size_h, image_size_w, 3, 3)) for i in range(divide): H_mat = H_mat_mul[:, i, :, :] if i == divide - 1: H_mat = F.broadcast_to(F.expand_dims(F.expand_dims(H_mat, 1), 1), (batch_size, image_size_h - i * small_patch_sz[0], image_size_w, 3, 3)) H_mat_pool[:, i * small_patch_sz[0]:, ...] = H_mat continue H_mat = F.broadcast_to(F.expand_dims(F.expand_dims( H_mat, 1), 1), (batch_size, small_patch_sz[0], image_size_w, 3, 3)) H_mat_pool[:, i * small_patch_sz[0]:(i + 1) * small_patch_sz[0], ...] = H_mat pred_I2_index_warp = F.expand_dims(patch_indices.transpose(0, 2, 3, 1), 4) pred_I2_index_warp = F.matmul(H_mat_pool, pred_I2_index_warp)[:, :, :, :, 0].transpose(0, 3, 1, 2) T_t = pred_I2_index_warp[:, 2:3, ...] smallers = 1e-6 T_t = T_t + smallers v1 = pred_I2_index_warp[:, 0:1, ...] v2 = pred_I2_index_warp[:, 1:2, ...] v1 = v1 / T_t v2 = v2 / T_t warp_index = F.concat((v1, v2), 1) vgrid = patch_indices[:, :2, ...] flow = warp_index - vgrid return flow
def train(): m = ResNet18MP() x = F.ones([32, 3, 224, 224]) label = F.zeros([ 32, ], dtype="int32") gm = ad.GradManager().attach(m.parameters()) opt = optim.SGD(m.parameters(), 1e-3, 0.9, 1e-4) for _ in range(2): with gm: y = m(x) if dist.get_rank() == 3: loss = F.nn.cross_entropy(y, label) else: loss = None gm.backward(loss) opt.step().clear_grad() print(loss)
def sample_mask_from_labels(labels, num_sample, sample_value): """generate mask for labels using sampling method. Args: labels (Tensor): num_sample (int): sample_value (int): Returns: sample_mask (Tensor) """ assert labels.ndim == 1, "Only tensor of dim 1 is supported." # TODO: support bool mask sample_mask = (labels == sample_value).astype("float32") num_mask = sample_mask.sum().astype("int32") if num_mask <= num_sample: return sample_mask random_tensor = sample_mask * uniform(size=labels.shape) _, sampled_idx = F.topk(random_tensor, k=num_sample - num_mask) sample_mask[sampled_idx] = F.zeros(sampled_idx.shape) return sample_mask
def forward(self, tenFirst, tenSecond): tenFirst = [self.preprocess(tenFirst)] tenSecond = [self.preprocess(tenSecond)] for intLevel in range(self.num_layers - 1): if tenFirst[0].shape[2] >= self.threshold or tenFirst[0].shape[ 3] >= self.threshold: tenFirst.insert( 0, F.avg_pool2d(inp=tenFirst[0], kernel_size=2, stride=2)) tenSecond.insert( 0, F.avg_pool2d(inp=tenSecond[0], kernel_size=2, stride=2)) tenFlow = F.zeros([ tenFirst[0].shape[0], 2, int(math.floor(tenFirst[0].shape[2] / 2.0)), int(math.floor(tenFirst[0].shape[3] / 2.0)) ]) # print(len(tenFirst)) for intLevel in range(len(tenFirst)): # normal: 5 for training (4*4, 8*8, 16*16, 32*32, 64*64) 5 for test (11*20, 22*40, 45*80, 90*160, 180*320) # small: 3 for training (16*16, 32*32, 64*64) 3 for test (45*80, 90*160, 180*320) tenUpsampled = F.nn.interpolate(inp=tenFlow, scale_factor=2, mode='BILINEAR', align_corners=True) * 2.0 if tenUpsampled.shape[2] != tenFirst[intLevel].shape[2]: tenUpsampled = pad_H(tenUpsampled) if tenUpsampled.shape[3] != tenFirst[intLevel].shape[3]: tenUpsampled = pad_W(tenUpsampled) tenFlow = self.netBasic[intLevel](F.concat([ tenFirst[intLevel], backwarp(tenInput=tenSecond[intLevel], tenFlow=tenUpsampled, border_mode=self.border_mode), tenUpsampled ], axis=1)) + tenUpsampled return tenFlow
def sigmoid_cross_entropy_retina(pred, label, ignore_label=-1, background=0, alpha=0.5, gamma=0): device = pred.device mask = 1 - F.equal(label, ignore_label).astype(np.float32) vlabel = label * mask n, m, c = pred.shape zero_mat = F.zeros([n, m, c + 1]).to(device) index = F.expand_dims(vlabel, 2).astype(np.int32) one_hot = F.scatter(zero_mat, 2, index, F.ones([n, m, 1])) onehot = one_hot[:, :, 1:] pos_part = F.pow(1 - pred, gamma) * onehot * F.log(pred) neg_part = F.pow(pred, gamma) * (1 - onehot) * F.log(1 - pred) loss = -(alpha * pos_part + (1 - alpha) * neg_part).sum(axis=2) * mask positive_mask = (label > 0) return loss.sum() / F.maximum(positive_mask.sum(), 1)
def test_assert_equal(): shape = (2, 3, 4, 5) x = F.ones(shape, dtype=np.float32) y = F.zeros(shape, dtype=np.float32) + 1.00001 z = F.utils._assert_equal(x, y)
def test_assert_not_equal(): shape = (2, 3, 4, 5) x = F.ones(shape, dtype=np.float32) y = F.zeros(shape, dtype=np.float32) + 1.1 with pytest.raises(RuntimeError): z = F.utils._assert_equal(x, y)
def forward(self, features, label=None, mask=None): """ if label and mask both None, the loss will degenerate to SimSLR unsupervised loss. Reference: "A Simple Framework for Contrastive Learning of Visual Representations"<https://arxiv.org/pdf/2002.05709.pdf> "Supervised Contrastive Learning"<https://arxiv.org/abs/2004.11362> Args: features(tensor): The embedding feature. shape=[bs, n_views, ...] label(tensor): The label of images, shape=[bs] mask(tensor): contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j has the same class as sample i. Can be asymmetric. return: loss """ if len(features.shape) < 3: raise ValueError("Features need have 3 dimensions at least") bs, num_view = features.shape[:2] #if dimension > 3, change the shape of the features to [bs, num_view, ...] if len(features.shape) > 3: features = features.reshape(bs, num_view, -1) #label and mask cannot provided at the same time if (label is not None) and (mask is not None): raise ValueError("label and mask cannot provided at the same time") elif (label is None) and (mask is None): mask = F.eye(bs, dtype="float32") elif label is not None: label = label.reshape(-1, 1) if label.shape[0] != bs: raise RuntimeError( "Num of labels does not match num of features") mask = F.equal(label, label.T) else: mask = mask.astype("float32") contrast_count = features.shape[1] features = F.split(features, features.shape[1], axis=1) contrast_feature = F.squeeze(F.concat(features, axis=0), axis=1) if self.contrast_mode == "one": anchor_feature = features[:, 0] anchor_count = 1 elif self.contrast_mode == "all": anchor_feature = contrast_feature anchor_count = contrast_count else: raise ValueError("Unknown mode:{}".format(self.contrast_mode)) #compute logits anchor_dot_contrast = F.div( F.matmul(anchor_feature, contrast_feature.T), self.temperate) #for numerical stability logits_max = F.max(anchor_dot_contrast, axis=-1, keepdims=True) logits = anchor_dot_contrast - logits_max #tile mask an1, con = mask.shape[:2] nums = anchor_count * contrast_count # mask-out self-contrast cases mask = F.stack([mask] * nums).reshape(an1 * anchor_count, con * contrast_count) logits_mask = F.scatter( F.ones_like(mask), 1, F.arange(0, int(bs * anchor_count), dtype="int32").reshape(-1, 1), F.zeros(int(bs * anchor_count), dtype="int32").reshape(-1, 1)) mask = mask * logits_mask #compute log_prob exp_logits = F.exp(logits) * logits_mask log_prob = logits - F.log(F.sum(exp_logits, axis=1, keepdims=True)) #equation 2 #mean mean_log_prob_pos = F.sum(mask * log_prob, axis=1) / F.sum(mask, axis=1) #loss loss = -(self.temperate / self.base_temperate) * mean_log_prob_pos loss = F.mean(loss.reshape(anchor_count, bs)) return loss
def run_argmin(): x = F.zeros((100, 100)) x[:] = float("inf") idxs = F.argmin(x, axis=0) return idxs
def test_insert_module(): class Neg(M.Module): def __init__(self, name): super().__init__(name) self.identity = M.Identity() self.identity_list = [M.Identity(), M.Identity()] self.identity_dict = {"0": M.Identity(), "1": M.Identity()} self.param = F.zeros((1, )) def forward(self, x): x = self.identity(x) for m in self.identity_dict: x = self.identity_dict[m](x) for m in self.identity_list: x = m(x) return F.neg(x) + self.param traced_module, x, expect = _init_block() graph = traced_module.graph relu_out = graph.get_function_by_type(F.relu).as_unique().outputs[0] self = graph.inputs[0] setattr(traced_module, "neg", Neg(name="neg")) setattr(traced_module, "neg2", Neg(name="neg")) setattr(traced_module, "param", F.zeros((1, ))) with graph.insert_exprs(): neg_out = self.neg(relu_out) neg_out = self.neg2(relu_out) neg_out = neg_out + self.param graph.replace_node({relu_out: neg_out}) graph.compile() np.testing.assert_allclose(expect - 1, 1 - traced_module(x), atol=1e-6) assert traced_module.neg.graph is not None assert traced_module.neg2.graph is not None assert traced_module.neg2.param is not None assert len(traced_module.neg.graph._exprs) == 13 for n in traced_module.graph.nodes(): if isinstance(n, TensorNode): assert n.value is None traced_module, x, expect = _init_module() setattr(traced_module.block0, "neg", Neg(name=None)) graph = traced_module.graph self = graph.inputs[0] out_node = graph.outputs[0] with graph.insert_exprs(): neg_out = self.block0.neg(out_node) graph.replace_node({out_node: neg_out}) graph.compile() np.testing.assert_allclose(expect, -traced_module(x), atol=1e-6) assert isinstance(traced_module.block0.neg, TracedModule) assert traced_module.block0.neg.graph is not None setattr(traced_module.block0.neg, "neg", Neg(name=None)) setattr(traced_module.block0.neg.neg, "relu", M.ReLU()) out_node = graph.outputs[0] with graph.insert_exprs(): neg_out = self.block0.neg.neg(out_node) neg_out = self.block0.neg.neg(neg_out) relu_out = self.block0.neg.neg.relu(neg_out) graph.replace_node({out_node: relu_out}) graph.compile() np.testing.assert_allclose(F.relu(-expect), traced_module(x), atol=1e-6) assert isinstance(traced_module.block0.neg.neg, TracedModule) assert traced_module.block0.neg.neg.graph is not None
def __init__(self): super().__init__() self.I = F.ones((1, )) self.M = F.zeros((1, ))
def __init__(self, name): super().__init__(name) self.identity = M.Identity() self.identity_list = [M.Identity(), M.Identity()] self.identity_dict = {"0": M.Identity(), "1": M.Identity()} self.param = F.zeros((1, ))
def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', labels=None, bbox_targets=None): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])) level_assignments = F.floor(canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2)) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level available_masks = F.concat( [F.ones(level_assignments.shape[0]), F.zeros(num_fms)], axis=0) level_assignments = F.concat( [level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))], axis=0) if labels is not None and bbox_targets is not None: labels = F.concat([labels, F.ones((num_fms, labels.shape[-1]))], axis=0) bbox_targets = F.concat( [bbox_targets, F.zeros((num_fms, bbox_targets.shape[-1]))], axis=0) pool_list, inds_list = [], [] for i in range(len(rpn_fms)): # mask = level_assignments == i # inds = mask_to_inds(mask) mask = F.equal(level_assignments, i) _, inds = F.cond_take(mask > 0, mask) rois_fm = rois[inds.astype(np.int32)] if roi_type == 'roi_pool': pool_fm = F.nn.roi_pooling(rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0 / stride[i]) elif roi_type == 'roi_align': pool_fm = F.nn.roi_align(rpn_fms[i], rois_fm, pool_shape, mode='average', spatial_scale=1.0 / stride[i], sample_points=2, aligned=True) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) pool_feature = F.concat(pool_list, axis=0) ordered_available_masks = available_masks[fm_order] # available_inds = mask_to_inds(ordered_available_masks) _, available_inds = F.cond_take(ordered_available_masks > 0, ordered_available_masks) available_inds = available_inds.astype(np.int32) pool_feature = pool_feature[available_inds.astype(np.int32)] rois = rois[fm_order, :][available_inds.astype(np.int32)] if labels is not None: labels = labels[fm_order][available_inds] bbox_targets = bbox_targets[fm_order][available_inds] return pool_feature, rois, labels.detach(), bbox_targets.detach() else: return pool_feature, rois, None, None
def __init__(self, num_classes, width=1.0, strides=[8, 16, 32], in_channels=[256, 512, 1024], act="silu", depthwise=False): """ Args: act (str): activation type of conv. Defalut value: "silu". depthwise (bool): wheather apply depthwise conv in conv branch. Defalut value: False. """ super().__init__() self.n_anchors = 1 self.num_classes = num_classes self.decode_in_inference = True # save for matching self.cls_convs = [] self.reg_convs = [] self.cls_preds = [] self.reg_preds = [] self.obj_preds = [] self.stems = [] Conv = DWConv if depthwise else BaseConv for i in range(len(in_channels)): self.stems.append( BaseConv( in_channels=int(in_channels[i] * width), out_channels=int(256 * width), ksize=1, stride=1, act=act, )) self.cls_convs.append( M.Sequential(*[ Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), ])) self.reg_convs.append( M.Sequential(*[ Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), ])) self.cls_preds.append( M.Conv2d( in_channels=int(256 * width), out_channels=self.n_anchors * self.num_classes, kernel_size=1, stride=1, padding=0, )) self.reg_preds.append( M.Conv2d( in_channels=int(256 * width), out_channels=4, kernel_size=1, stride=1, padding=0, )) self.obj_preds.append( M.Conv2d( in_channels=int(256 * width), out_channels=self.n_anchors * 1, kernel_size=1, stride=1, padding=0, )) self.use_l1 = False self.strides = strides self.grids = [F.zeros(1)] * len(in_channels) self.expanded_strides = [None] * len(in_channels)
def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes, im_info): # pylint: disable=too-many-statements def positive_bag_loss(logits, axis=1): weight = 1.0 / (1.0 - logits) weight /= weight.sum(axis=axis, keepdims=True) bag_prob = (weight * logits).sum(axis=1) return -layers.safelog(bag_prob) def negative_bag_loss(logits, gamma): return (logits**gamma) * (-layers.safelog(1.0 - logits)) pred_scores = F.sigmoid(pred_logits) box_prob_list = [] positive_losses = [] clamp_eps = 1e-7 bucket_size = self.cfg.bucket_size for bid in range(im_info.shape[0]): boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")] # id 0 is used for background classes, so -1 first labels = boxes_info[:, 4].astype("int32") - 1 pred_box = self.box_coder.decode(anchors, pred_offsets[bid]).detach() overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach() thresh1 = self.cfg.box_iou_threshold thresh2 = F.clip(overlaps.max(axis=1, keepdims=True), lower=thresh1 + clamp_eps, upper=1.0) gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1), lower=0, upper=1.0) image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach() # guarantee that nonzero_idx is not empty if gt_pred_prob.max() > clamp_eps: _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob) # since nonzeros is only 1 dim, use num_anchor to get real indices num_anchors = gt_pred_prob.shape[1] anchors_idx = nonzero_idx % num_anchors gt_idx = nonzero_idx // num_anchors image_boxes_prob[anchors_idx, labels[gt_idx]] = gt_pred_prob[gt_idx, anchors_idx] box_prob_list.append(image_boxes_prob) # construct bags for objects match_quality_matrix = layers.get_iou(boxes_info[:, :4], anchors).detach() num_gt = match_quality_matrix.shape[0] _, matched_idx = F.topk( match_quality_matrix, k=bucket_size, descending=True, no_sort=True, ) matched_idx = matched_idx.detach() matched_idx_flatten = matched_idx.reshape(-1) gather_idx = labels.reshape(-1, 1) gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size)) gather_src = pred_scores[bid, matched_idx_flatten] gather_src = gather_src.reshape(num_gt, bucket_size, -1) matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2) topk_anchors = anchors[matched_idx_flatten] boxes_broad_cast = F.broadcast_to( F.expand_dims(boxes_info[:, :4], axis=1), (num_gt, bucket_size, 4)).reshape(-1, 4) matched_offsets = self.box_coder.encode(topk_anchors, boxes_broad_cast) reg_loss = layers.smooth_l1_loss( pred_offsets[bid, matched_idx_flatten], matched_offsets, beta=self.cfg.smooth_l1_beta).sum( axis=-1) * self.cfg.reg_loss_weight matched_reg_scores = F.exp(-reg_loss) positive_losses.append( positive_bag_loss(matched_score * matched_reg_scores.reshape(-1, bucket_size), axis=1)) num_foreground = im_info[:, 4].sum() pos_loss = F.concat(positive_losses).sum() / F.maximum( 1.0, num_foreground) box_probs = F.stack(box_prob_list, axis=0) neg_loss = negative_bag_loss( pred_scores * (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum( 1.0, num_foreground * bucket_size) alpha = self.cfg.focal_loss_alpha pos_loss = pos_loss * alpha neg_loss = neg_loss * (1 - alpha) loss_dict = { "total_loss": pos_loss + neg_loss, "pos_loss": pos_loss, "neg_loss": neg_loss, } return loss_dict
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] ious = [] candidate_idxs = [] base = 0 for stride, anchors_i in zip(self.cfg.stride, anchors_list): ious.append( layers.get_iou( gt_boxes[:, :4], F.concat([ anchors_i - stride * self.cfg.anchor_scale / 2, anchors_i + stride * self.cfg.anchor_scale / 2, ], axis=1))) gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 distances = F.sqrt( F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2, axis=2)) _, topk_idxs = F.topk(distances, self.cfg.anchor_topk) candidate_idxs.append(base + topk_idxs) base += anchors_i.shape[0] ious = F.concat(ious, axis=1) candidate_idxs = F.concat(candidate_idxs, axis=1) candidate_ious = F.gather(ious, 1, candidate_idxs) ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) + F.std(candidate_ious, axis=1, keepdims=True)) is_foreground = F.scatter( F.zeros(ious.shape), 1, candidate_idxs, F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr) is_in_boxes = F.min(self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)), axis=2) > 0 ious[~is_foreground] = -1 ious[~is_in_boxes] = -1 match_indices = F.argmax(ious, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_max_iou == -1] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1), lower=0) * F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), lower=0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )