def backward(self, grad_output): X, W = self.saved_tensors # recompute X_out X_list = [ X, ] for A in self.A_list: if A is not None: X_list.append(nd.sparse.dot(A, X)) else: X_list.append(nd.zeros_like(X)) X_out = nd.concat(*X_list, dim=1) grad_W = nd.dot(X_out.T, grad_output) grad_X_out = nd.dot(grad_output, W.T) grad_X_out_list = nd.split(grad_X_out, num_outputs=len(self.A_list) + 1) grad_X = [ grad_X_out_list[0], ] for A, grad_X_out in zip(self.A_list, grad_X_out_list[1:]): if A is not None: grad_X.append(nd.sparse.dot(A, grad_X_out)) else: grad_X.append(nd.zeros_like(grad_X_out)) grad_X = sum(grad_X) return grad_X, grad_W
def concentration_transfer(class_pre_l, class_true_l, con_pre_l, con_true_l, data_utils): eth_co_me_limit = nd.array([[ data_utils.scale_CO[1], data_utils.scale_CO[0], data_utils.scale_Me[0] ]]) concentration_mat_pre = nd.where(class_pre_l > 0.5, nd.repeat(eth_co_me_limit, repeats=class_pre_l.shape[0], axis=0), \ nd.zeros_like(class_pre_l)) concentration_mat_true = nd.where(class_true_l == 1, nd.repeat(eth_co_me_limit, repeats=class_true_l.shape[0], axis=0), \ nd.zeros_like(class_true_l)) eth_con_pre, eth_con_true = concentration_mat_pre[:, 0] * con_pre_l[:, 1], concentration_mat_true[:, 0] * con_true_l[:, 1] co_con_pre, co_con_true = concentration_mat_pre[:, 1] * con_pre_l[:, 0], concentration_mat_true[:, 1] * con_true_l[:, 0] me_con_pre, me_con_true = concentration_mat_pre[:, 2] * con_pre_l[:, 0], concentration_mat_true[:, 2] * con_true_l[:, 0] eth_co_me_con_pre = nd.concat(nd.expand_dims(eth_con_pre, axis=0), nd.expand_dims(co_con_pre, axis=0), \ nd.expand_dims(me_con_pre, axis=0), dim=0).transpose() eth_co_me_con_true = nd.concat(nd.expand_dims(eth_con_true, axis=0), nd.expand_dims(co_con_true, axis=0), \ nd.expand_dims(me_con_true, axis=0), dim=0).transpose() return eth_co_me_con_pre, eth_co_me_con_true
def _ohem_single(self, score_gt, score_pred, training_masks): if self.debug: print("score_gt_shape:", score_gt.shape, "score_pred_shape:", score_pred.shape, \ "train_mask_shape:", training_masks.shape) pos_gt_thres = F.where(score_gt > 0.5, F.ones_like(score_gt), F.zeros_like(score_gt)) pos_num = F.sum(pos_gt_thres) - F.sum(pos_gt_thres * training_masks) if pos_num == 0: selected_mask = training_masks return selected_mask neg_lt_thres = F.where(score_gt <= 0.5, F.ones_like(score_gt), F.zeros_like(score_gt)) neg_num = F.sum(neg_lt_thres) neg_num = min(pos_num * 3, neg_num) if neg_num == 0: selected_mask = training_masks return training_masks neg_score = neg_lt_thres * score_pred neg_score_sorted = F.sort(neg_score.reshape(-1), is_ascend=0, axis=None) threshold = neg_score_sorted[neg_num - 1] score_gt_thres = F.where(score_pred >= threshold, F.ones_like(score_pred), F.zeros_like(score_pred)) trained_sample_mask = F.logical_or(score_gt_thres, pos_gt_thres) selected_mask = F.logical_and(trained_sample_mask, training_masks) return selected_mask
def sample(match, cls_pred, iou, ratio=3, min_sample=0, threshold=0.5, do=True): if do is False: ones = nd.ones_like(match) sample = nd.where(match > -0.5, ones, ones*-1) return sample sample = nd.zeros_like(match) num_pos = nd.sum(match > -0.5, axis=-1) requre_neg = ratio * num_pos neg_mask = nd.where(match < -0.5, nd.max(iou, axis=-1) < threshold, sample) max_neg = neg_mask.sum(axis=-1) num_neg = nd.minimum(max_neg, nd.maximum(requre_neg, min_sample)).astype('int') neg_prob = cls_pred[:,:,0] max_value = nd.max(cls_pred, axis=-1, keepdims=True) score = max_value[:,:,0] - neg_prob + nd.log( nd.sum( nd.exp(cls_pred-max_value), axis=-1)) score = nd.where(neg_mask, score, nd.zeros_like(score)) argmax = nd.argsort(score, axis=-1, is_ascend=False) sample = nd.where(match > -0.5, nd.ones_like(sample), sample) for i, num in enumerate(num_neg): sample[i, argmax[i,:num.asscalar()]] = -1 return sample
def label_offset(anchors, bbox, match, sample, means=(0,0,0,0), stds=(0.1,0.1,0.2,0.2), flatten=True): anchors = anchors.reshape((-1,4)) N, _ = anchors.shape B, M, _ = bbox.shape anchor_x, anchor_y, anchor_w, anchor_h = corner_to_center(anchors, split=True) bbox = bbox.reshape((B,1,M,4)) bbox = nd.broadcast_to(bbox, (B,N,M,4)) bbox = nd.stack(*[nd.pick(bbox[:,:,:,p], match) for p in range(4)], axis=-1) bbox_x, bbox_y, bbox_w, bbox_h = corner_to_center(bbox, split=True) offset_x = ((bbox_x - anchor_x) / anchor_w - means[0]) / stds[0] offset_y = ((bbox_y - anchor_y) / anchor_h - means[1]) / stds[1] offset_w = (nd.log(bbox_w/anchor_w) - means[2]) / stds[2] offset_h = (nd.log(bbox_h/anchor_h) - means[3]) / stds[3] offset = nd.concat(*(offset_x, offset_y, offset_w, offset_h), dim=-1) sample = sample.reshape((B,N,1)) sample = nd.broadcast_to(sample, (B,N,4)) > 0.5 anchor_offset = nd.where(sample, offset, nd.zeros_like(offset)) anchor_mask = nd.where(sample, nd.ones_like(offset), nd.zeros_like(offset)) if flatten: anchor_offset = anchor_offset.reshape((B,-1)) anchor_mask = anchor_mask.reshape((B,-1)) return anchor_mask, anchor_offset
def __init__(self, num_sample, num_local, rank, local_rank, name, embedding_size, prefix, gpu=True): self.num_sample = num_sample self.num_local = num_local self.rank = rank self.name = name self.embedding_size = embedding_size self.gpu = gpu self.prefix = prefix if gpu: self.weight = nd.random_normal(loc=0, scale=0.01, shape=(self.num_local, self.embedding_size), ctx=mx.gpu(local_rank)) self.weight_mom = nd.zeros_like(self.weight) else: self.weight = nd.random_normal(loc=0, scale=0.01, shape=(self.num_local, self.embedding_size)) self.weight_mom = nd.zeros_like(self.weight) self.weight_index_sampler = WeightIndexSampler(num_sample, num_local, rank, name) pass
def __init__(self, model, lr=0.1, momentum=0.5): self.model = model self.momentum = momentum self.lr = lr self.dW = nd.zeros_like(model.W) self.dv = nd.zeros_like(model.v_bias) self.dh = nd.zeros_like(model.h_bias)
def add(self, bg_batch, r_max, add_rate=1.0): ctx = bg_batch.context bs = bg_batch.shape[0] h = bg_batch.shape[2] w = bg_batch.shape[3] mask_batch = nd.zeros_like(bg_batch) image_batch = nd.zeros_like(bg_batch) label_batch = nd.ones((bs, 1, 10), ctx=ctx) * (-1) for i in range(bs): if np.random.rand() > add_rate: continue LP, LP_type, _ = self.draw_LP() output_size = (h, w) input_size = (self.project_rect_6d.camera_h, self.project_rect_6d.camera_w) mask, image, label = self.random_projection_LP_6D( LP, input_size, output_size, r_max) mask_batch[i] = mask.as_in_context(ctx) image_batch[i] = image.as_in_context(ctx) label_batch[i, :, :-1] = label label_batch[i, :, -1] = LP_type img_batch = bg_batch * (1 - mask_batch) + image_batch * mask_batch img_batch = nd.clip(img_batch, 0, 1) return img_batch, label_batch
def get_accuracy(pre_l, true_l): one_zero_pre = nd.where(pre_l > 0.5, nd.ones_like(pre_l), nd.zeros_like(pre_l)) compare = nd.equal(one_zero_pre, true_l).sum(axis=1) samples_right = nd.where(compare == 3, nd.ones_like(compare), nd.zeros_like(compare)).sum() all_num = pre_l.shape[0] return samples_right / all_num
def backward(self, dZ): ctx = context(dZ) X, Y, argX, argY = self.saved_tensors gidx, op, reduce_op = self.gidx, self.op, self.reduce_op if op != 'copy_rhs': g_rev = gidx.reverse() if reduce_op == 'sum': if op in ['mul', 'div']: dX = _gspmm(g_rev, 'mul', 'sum', dZ, _muldiv(op, Y))[0] elif op in ['add', 'sub']: dX = _gspmm(g_rev, 'copy_lhs', 'sum', dZ, Y)[0] elif op == 'copy_lhs': dX = _gspmm(g_rev, 'copy_lhs', 'sum', dZ, None)[0] else: if op in ['mul', 'div']: dX = _scatter_nd( argX, _muldiv( op, _gather_nd( argY, Y.broadcast_to( (Y.shape[0], *dZ.shape[1:])))) * dZ, X.shape[0]) elif op in ['add', 'sub', 'copy_lhs']: dX = _scatter_nd(argX, dZ, X.shape[0]) dX = _reduce_grad(dX, X.shape) else: dX = nd.zeros_like(X) if op != 'copy_lhs': if reduce_op == 'sum': if op == 'mul' and _need_reduce_last_dim(X, Y): dY = _gsddmm(gidx, 'dot', X, dZ) elif op in ['mul', 'div']: dY = _gsddmm(gidx, 'mul', X, dZ) if op == 'div': dY = -dY / (Y**2) elif op in ['add', 'sub', 'copy_rhs']: dY = _gsddmm(gidx, 'copy_rhs', X, _addsub(op, dZ)) else: if op in ['mul', 'div']: dY = _scatter_nd( argY, _gather_nd(argX, X.broadcast_to( (X.shape[0], *dZ.shape[1:]))) * dZ, Y.shape[0]) if op == 'div': dY = -dY / (Y**2) elif op in ['add', 'sub', 'copy_rhs']: dY = _scatter_nd(argY, _addsub(op, dZ), Y.shape[0]) dY = _reduce_grad(dY, Y.shape) else: dY = nd.zeros_like(Y) self.saved_tensors = None return dX, dY
def backward(self, dZ): ctx = context(dZ) X, Y = self.saved_tensors gidx, op = self.gidx, self.op lhs_target, rhs_target = self.lhs_target, self.rhs_target if op != 'copy_rhs': if lhs_target in ['u', 'v']: _gidx = gidx if self.lhs_target == 'v' else gidx.reverse() if op in ['add', 'sub', 'copy_lhs']: dX = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ)[0] else: # mul, div, dot if rhs_target == lhs_target: dX = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ)[0] * _muldiv(op, Y) elif self.rhs_target == 'e': dX = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ * _muldiv(op, Y))[0] else: # rhs_target = !lhs_target dX = _gspmm(_gidx, 'mul', 'sum', _muldiv(op, Y), dZ)[0] else: # lhs_target == 'e' if op in ['add', 'sub', 'copy_lhs']: dX = dZ else: # mul, div, dot dX = _gsddmm(gidx, 'mul', dZ, _muldiv(op, Y), 'e', rhs_target) dX = _reduce_grad(dX, X.shape) else: dX = nd.zeros_like(X) if op != 'copy_lhs': if self.rhs_target in ['u', 'v']: _gidx = gidx if rhs_target == 'v' else gidx.reverse() if op in ['add', 'sub', 'copy_rhs']: dY = _gspmm(_gidx, 'copy_rhs', 'sum', None, _addsub(op, dZ))[0] else: # mul, div, dot if lhs_target == rhs_target: dY = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ)[0] * X elif self.lhs_target == 'e': dY = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ * X)[0] else: # rhs_target = !lhs_target dY = _gspmm(_gidx, 'mul', 'sum', X, dZ)[0] if op == 'div': dY = -dY / (Y**2) else: if op in ['add', 'sub', 'copy_rhs']: dY = _addsub(op, dZ) else: # mul, div, dot dY = _gsddmm(gidx, 'mul', dZ, X, 'e', lhs_target) if op == 'div': dY = -dY / (Y**2) dY = _reduce_grad(dY, Y.shape) else: dY = nd.zeros_like(Y) self.saved_tensors = None return dX, dY
def forward(self, cls_pred, box_pred, cls_target, box_target, invalid): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target, invalid = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target, invalid)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): # cp (b, N, num_cls+1); bp (b, N, 4); ct (b, N); bt (b, N, 4) pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1: # no positive samples found, return dummy losses return nd.zeros((1, )), nd.zeros((1, )), nd.zeros((1, )) # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt, inval in zip( *[cls_pred, box_pred, cls_target, box_target, invalid]): # cp (b, N, num_cls+1); bp (b, N, 4); ct (b, N); bt (b, N, 4); inval (b, N) pred = nd.log_softmax(cp, axis=-1) # (b, N, cls_num+1) pos = ct > 0 # (b, N) cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) # (b, N) # to ignored the classified well anchors. cls_loss = nd.where(inval, nd.zeros_like(cls_loss), cls_loss) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort( axis=1) # get the response id in the sorted loss. hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims( -1) # (b, N) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append( nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append( nd.sum(box_loss, axis=0, exclude=True) / num_pos_all) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def box_ciou(b1, b2): """ 输入为: ---------- b1: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh b2: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh 返回为: ------- ciou: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 1) """ # 求出预测框左上角右下角 b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh / 2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # 求出真实框左上角右下角 b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh / 2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half # 求真实框和预测框所有的iou intersect_mins = nd.max(b1_mins, b2_mins) intersect_maxes = nd.min(b1_maxes, b2_maxes) intersect_wh = nd.max(intersect_maxes - intersect_mins, nd.zeros_like(intersect_maxes)) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] union_area = b1_area + b2_area - intersect_area iou = intersect_area / nd.clip(union_area, a_min=1e-6) # 计算中心的差距 center_distance = nd.sum(nd.power((b1_xy - b2_xy), 2), axis=-1) # 找到包裹两个框的最小框的左上角和右下角 enclose_mins = nd.min(b1_mins, b2_mins) enclose_maxes = nd.max(b1_maxes, b2_maxes) enclose_wh = nd.max(enclose_maxes - enclose_mins, nd.zeros_like(intersect_maxes)) # 计算对角线距离 enclose_diagonal = nd.sum(nd.power(enclose_wh, 2), axis=-1) ciou = iou - 1.0 * (center_distance) / nd.clip(enclose_diagonal, a_min=1e-6) v = (4 / (math.pi**2)) * nd.power( (nd.arctan(b1_wh[..., 0] / nd.clip(b1_wh[..., 1], min=1e-6)) - nd.arctan(b2_wh[..., 0] / nd.clip(b2_wh[..., 1], a_min=1e-6))), 2) alpha = v / nd.clip((1.0 - iou + v), a_max=1e-6) ciou = ciou - alpha * v return ciou
def __init__(self, hyperparams, net, params, loss_func, model_ctx, accountant): super(DpAdam, self).__init__(hyperparams, net, params, loss_func, model_ctx, accountant) # Compute scale of Gaussian noise to add self._hyperparams['sigma'] = hyperparams['z'] * (2 * hyperparams['l2_clipping_bound'] / hyperparams['lot_size']) # Initialize 1st and 2nd moment vectors self._m = {} self._v = {} for param_name, param in self._params.items(): self._m[param_name] = nd.zeros_like(param) self._v[param_name] = nd.zeros_like(param)
def render(self, bg_batch): ctx = bg_batch.context bs = bg_batch.shape[0] h = bg_batch.shape[2] w = bg_batch.shape[3] mask_batch = nd.zeros_like(bg_batch) image_batch = nd.zeros_like(bg_batch) label_batch = nd.ones((bs, 7, 3), ctx=ctx) * (-1) for i in range(bs): LP, LP_type, labels = self.draw_LP() # LP_w, LP_h = LP.size resize = np.random.uniform(low=0.9, high=1.0) LP_w = LP.size[0] * resize LP_h = LP.size[1] * resize * np.random.uniform(low=0.9, high=1.1) LP_w = int(LP_w) LP_h = int(LP_h) LP = LP.resize((LP_w, LP_h), PIL.Image.BILINEAR) LP, r = self.pil_image_enhance(LP, M=10.0, N=10.0, R=5.0, G=8.0) paste_x = np.random.randint(int(-0.1 * LP_w), int(self.w - 0.9 * LP_w)) paste_y = np.random.randint(int(-0.1 * LP_h), int(self.h - 0.9 * LP_h)) tmp = PIL.Image.new('RGBA', (self.w, self.h)) tmp.paste(LP, (paste_x, paste_y)) mask = yolo_gluon.pil_mask_2_rgb_ndarray(tmp.split()[-1]) image = yolo_gluon.pil_rgb_2_rgb_ndarray(tmp, augs=self.augs) mask_batch[i] = mask.as_in_context(ctx) image_batch[i] = image.as_in_context(ctx) r = r * np.pi / 180 offset = paste_x + abs(LP_h * math.sin(r) / 2) # print(labels) for j, c in enumerate(labels): label_batch[i, j, 0] = c[0] label_batch[i, j, 1] = (offset + c[1] * LP_w * math.cos(r)) / self.w label_batch[i, j, 2] = (offset + c[2] * LP_w * math.cos(r)) / self.w img_batch = bg_batch * (1 - mask_batch) / 255. + image_batch * mask_batch img_batch = nd.clip(img_batch, 0, 1) return img_batch, label_batch
def balance_sampler(samples): """ignore extra negative samples to keep batch balance""" num_pos = nd.sum(samples == 1, axis=0) num_neg = nd.sum(samples == 0, axis=0) drop_prob = (num_neg - num_pos) / num_neg drop_prob = nd.where(nd.lesser(drop_prob, 0), nd.zeros_like(drop_prob), drop_prob) mask = nd.where( nd.greater( nd.random.uniform(0, 1, shape=samples.shape, ctx=samples.context), drop_prob), nd.ones_like(samples), nd.zeros_like(samples)) mask = nd.where(nd.equal(samples, 1), samples, mask) return mask
def get_cls_targets(cls_targets_1, cls_targets_2): cls_targets = [] for (cls_target_1, cls_target_2) in zip(cls_targets_1, cls_targets_2): cls_target_1_idx = nd.where(cls_target_1 > 0, nd.ones_like(cls_target_1), nd.zeros_like(cls_target_1)) cls_target_2_idx = nd.where(cls_target_2 > 0, nd.ones_like(cls_target_2), nd.zeros_like(cls_target_2)) cls_target_idx = nd.where(cls_target_1_idx == cls_target_2_idx, nd.ones_like(cls_target_1_idx),\ nd.zeros_like(cls_target_1_idx)) cls_target = nd.where(cls_target_idx, cls_target_1, nd.ones_like(cls_target_1) * -1) cls_targets.append(cls_target) return cls_targets
def forward(self, rcnn_cls_pred, rcnn_bbox_pred, rcnn_cls_gt, rcnn_bbox_gt): with autograd.pause(): ctx = rcnn_cls_pred.context roi_num = rcnn_cls_pred.shape[0] roi_idx = nd.arange(roi_num, ctx=ctx).reshape(-1, 1) fg_bbox_mask = (rcnn_cls_gt > 0).reshape(0, 1, 1) bbox_weights = nd.zeros_like(rcnn_bbox_gt).reshape(0, -1, 4) bbox_weights[roi_idx, rcnn_cls_gt[:], :] = \ self._bbox_weights.data(ctx).broadcast_to((roi_num, 1, 4)) * fg_bbox_mask bbox_weights = bbox_weights.reshape(0, -1) # rcnn_cls_pred.shape (roi_num, num_classes) rcnn_cls_log = nd.log(nd.clip(rcnn_cls_pred, 1e-14, 1)) cls_log_loss = -nd.sum(rcnn_cls_log[ roi_idx, rcnn_cls_gt]) / self._roi_batch_size.data(ctx) # rcnn_bbox_pred.shape (roi_num, num_classes*4) rcnn_bbox_smooth_l1 = nd.smooth_l1(rcnn_bbox_pred - rcnn_bbox_gt, scalar=1.0) bbox_smooth_l1_loss = nd.sum( rcnn_bbox_smooth_l1 * bbox_weights) / self._roi_batch_size.data(ctx) return cls_log_loss, bbox_smooth_l1_loss
def hybrid_forward(self, F, samples, matches, refs): """HybridBlock, handle multi batch correctly Parameters ---------- samples: (B, N), value +1 (positive), -1 (negative), 0 (ignore) matches: (B, N), value range [0, M) refs: (B, M), value range [0, num_fg_class), excluding background Returns ------- targets: (B, N), value range [0, num_fg_class + 1), including background """ # samples (B, N) (+1, -1, 0: ignore), matches (B, N) [0, M), refs (B, M) # reshape refs (B, M) -> (B, 1, M) -> (B, N, M) refs = F.repeat(refs.reshape((0, 1, -1)), axis=1, repeats=matches.shape[1]) # ids (B, N, M) -> (B, N), value [0, M + 1), 0 reserved for background class target_ids = F.pick(refs, matches, axis=2) + 1 # samples 0: set ignore samples to ignore_label targets = F.where(samples > 0.5, target_ids, nd.ones_like(target_ids) * self._ignore_label) # samples -1: set negative samples to 0 targets = F.where(samples < -0.5, nd.zeros_like(targets), targets) return targets
def callback_elbo_sample(my_model, data_batch): """Get a reduced-variance estimate of the elbo and sample.""" n_samples_stats = 10 _, elbo, sample = my_model(data_batch) for _ in range(n_samples_stats): tmp_sample = nd.zeros_like(sample) tmp_elbo = nd.zeros_like(elbo) for _ in range(n_samples_stats): _, elbo, sample = my_model(data_batch) tmp_sample += sample tmp_elbo += elbo tmp_sample /= n_samples_stats tmp_elbo /= n_samples_stats tmp_sample = np.mean(tmp_sample.asnumpy(), 0) tmp_elbo = np.mean(tmp_elbo.asnumpy()) return tmp_elbo, tmp_sample
def _epsilon_lrp_slow(self, R, epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 This function shows all necessary operations to perform LRP in one place and is therefore not optimized ''' N, Hout, Wout, NF = R.shape hf, wf, df, NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X, ctx=self.ctx, dtype=self.dtype) for i in range(Hout): for j in range(Wout): Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims( self.X[:, i * hstride:i * hstride + hf, j * wstride:j * wstride + wf, :], 4) Zs = Z.sum(axis=(1, 2, 3), keepdims=True) + nd.expand_dims( nd.expand_dims( nd.expand_dims(nd.expand_dims(self.B, 0), 0), 0), 0) Zs += epsilon * ((Zs >= 0) * 2 - 1) Rx[:, i * hstride:i * hstride + hf:, j * wstride:j * wstride + wf:, :] += ((Z / Zs) * nd.expand_dims( R[:, i:i + 1, j:j + 1, :], axis=3)).sum(axis=4) return Rx
def accuracy(predictions, targets): # predictions = nd.argmax(predictions, 1) # targets = nd.argmax(targets, 1) # return nd.mean(nd.equal(predictions, targets)).asscalar() * 100 predictions = nd.where(predictions > 0.5, nd.ones_like(predictions), nd.zeros_like(predictions)) return 100 - nd.mean(nd.abs(predictions - targets)).asscalar() * 100
def update(self, lrate): N, Hx, Wx, Dx = self.X.shape N, Hy, Wy, NF = self.DY.shape hf, wf, df, NF = self.W.shape hstride, wstride = self.stride DW = nd.zeros_like(self.W, ctx=self.ctx, dtype=self.dtype) if not (hf == wf and self.stride == (1, 1)): for i in range(Hy): for j in range(Wy): DW += ( nd.expand_dims(self.X[:, i * hstride:i * hstride + hf, j * wstride:j * wstride + wf, :], axis=4) * nd.expand_dims(self.DY[:, i:i + 1, j:j + 1, :], axis=3)).sum(axis=0) else: for i in range(hf): for j in range(wf): DW[i, j, :, :] = nd.sum(nd.expand_dims( self.X[:, i:i + Hy:hstride, j:j + Wy:wstride, :], axis=4) * nd.expand_dims(self.DY, axis=3), axis=(0, 1, 2)) DB = self.DY.sum(axis=(0, 1, 2)) self.W -= lrate * DW / (hf * wf * df * Hy * Wy)**.5 self.B -= lrate * DB / (Hy * Wy)**.5
def test(config_yaml: str, data: np.array, test_fn: Callable = None): np.random.seed(23423) def get_data_iter(batch_size, shuffle): dataset = gluon.data.ArrayDataset(data.astype(np.float32), range(len(data))) return gluon.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) config = yaml.load(config_yaml) data_iter = get_data_iter(config['gradient']['batch_size'], shuffle=True) my_model = fit.fit(config, data_iter) data_iter = get_data_iter(config['gradient']['batch_size'], shuffle=False) n_samples_stats = 10 for data_batch in data_iter: _, _, sample = my_model(data_batch) tmp_sample = nd.zeros_like(sample) for _ in range(n_samples_stats): _, _, sample = my_model(data_batch) tmp_sample += sample tmp_sample /= n_samples_stats if tmp_sample.ndim == 3: tmp_sample = nd.mean(tmp_sample, 0, keepdims=True) tmp_sample = tmp_sample.asnumpy() if len(data) == 1: tmp_sample = tmp_sample.reshape((1, -1)) test_fn(tmp_sample) else: test_fn(tmp_sample, data_batch[0].asnumpy())
def hybrid_forward(self, F, score_gt, kernel_gt, score_pred, training_masks, *args, **kwargs): # cal ohem mask selected_masks = [] for i in range(score_gt.shape[0]): # cal for text region selected_mask = self._ohem_single(score_gt[i:i+1], score_pred[i:i+1], training_masks[i:i+1]) selected_masks.append(selected_mask) selected_masks = F.concat(*selected_masks, dim=0) C_pred = score_pred[:, 0, :, :] self.pixel_acc = batch_pix_accuracy(C_pred, score_gt) # classification loss eps = 1e-5 intersection = F.sum(score_gt * C_pred * selected_masks, axis=(1, 2)) union = F.sum(selected_masks * score_gt * score_gt, axis=(1, 2)) + F.sum(selected_masks * C_pred * C_pred, axis=(1, 2)) + eps C_dice_loss = 1. - (2 * intersection) / (union) # loss for kernel kernel_mask = F.where(training_masks * C_pred > 0.5, F.ones_like(C_pred), F.zeros_like(C_pred)) kernel_mask = F.expand_dims(kernel_mask, axis=1) kernel_mask = F.repeat(kernel_mask, repeats=self.num_kernels-1, axis=1) self.kernel_acc = batch_pix_accuracy(score_pred[:, 1, :, :] * score_gt, kernel_gt[:, 0, :, :]) kernel_intersection = F.sum(kernel_gt * score_pred[:, 1:, :, :] * kernel_mask, axis=(2, 3)) kernel_union = F.sum(kernel_gt * kernel_gt * kernel_mask, axis=(2, 3)) + F.sum(score_pred[:, 1:, :, :] * score_pred[:, 1:, :, :] * kernel_mask, axis=(2, 3)) + eps kernel_dice = 1. - (2 * kernel_intersection) / kernel_union kernel_dice_loss = F.mean(kernel_dice, axis=1) self.C_loss = C_dice_loss self.kernel_loss = kernel_dice_loss loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss return loss
def mean_loss(yhat, y): ans = 0.0 mean = nd.zeros_like(y) for i in yhat: mean += i mean = mean / len(yhat) return loss_fn(mean, y)
def get_final_preds(batch_heatmaps, center, scale): coords, maxvals = get_max_pred(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: diff = nd.concat(hm[py][px+1] - hm[py][px-1], hm[py+1][px] - hm[py-1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def hybrid_forward(self, F, score_gt, kernel_gt, score_pred, training_masks, *args, **kwargs): """ kernels map's order: [1, ..., 0.5] """ C_pred = score_pred[:, 0, :, :] self.pixel_acc = batch_pix_accuracy(C_pred, score_gt) # classification loss eps = 1e-5 intersection = F.sum(score_gt * C_pred * training_masks, axis=(1, 2)) union = F.sum(training_masks * score_gt * score_gt, axis=(1, 2)) + F.sum(training_masks * C_pred * C_pred, axis=(1, 2)) + eps C_dice_loss = 1. - (2 * intersection) / (union) # loss for kernel kernel_mask = F.where(training_masks * C_pred > 0.5, F.ones_like(C_pred), F.zeros_like(C_pred)) kernel_mask = F.expand_dims(kernel_mask, axis=1) kernel_mask = F.repeat(kernel_mask, repeats=self.num_kernels-1, axis=1) self.kernel_acc = batch_pix_accuracy(score_pred[:, 1, :, :] * score_gt, kernel_gt[:, 0, :, :]) kernel_intersection = F.sum(kernel_gt * score_pred[:, 1:, :, :] * kernel_mask, axis=(2, 3)) kernel_union = F.sum(kernel_gt * kernel_gt * kernel_mask, axis=(2, 3)) + F.sum(score_pred[:, 1:, :, :] * score_pred[:, 1:, :, :] * kernel_mask, axis=(2, 3)) + eps kernel_dice = 1. - (2 * kernel_intersection) / kernel_union kernel_dice_loss = F.mean(kernel_dice, axis=1) self.C_loss = C_dice_loss self.kernel_loss = kernel_dice_loss loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss return loss
def _minimize(self, data, labels): lot_loss = 0 # Create storage for batches of summed gradients accumulated_grads = {} for param_name, param in self._params.items(): accumulated_grads[param_name] = nd.zeros_like(param) for start_idx in range(0, self._hyperparams['lot_size'], self._batch_size): end_idx = min(self._hyperparams['lot_size'], start_idx + self._batch_size) batch_data = nd.slice_axis(data, axis=0, begin=start_idx, end=end_idx) batch_labels = nd.slice_axis(labels, axis=0, begin=start_idx, end=end_idx) # compute sum of clipped gradients for this batch of this lot lot_loss += self._accumulate_batch_gradients( batch_data, batch_labels, accumulated_grads) # then wait for computation to finish so that memory can be cleaned up before next batch nd.waitall() # use the computed gradients to update the parameters self._update_params(accumulated_grads) # block here, since the next step will depend on this result return lot_loss.asscalar() / self._hyperparams['lot_size']
def _epsilon_lrp(self, R, epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 ''' N, Hout, Wout, NF = R.shape hf, wf, df, NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X, ctx=self.ctx, dtype=self.dtype) R_norm = R / (self.Y + epsilon * ((self.Y >= 0) * 2 - 1.)) for i in range(Hout): for j in range(Wout): if self.lrp_aware: Z = self.Z[:, i, j, ...] else: Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims( self.X[:, i * hstride:i * hstride + hf, j * wstride:j * wstride + wf, :], axis=4) Rx[:, i * hstride:i * hstride + hf:, j * wstride:j * wstride + wf:, :] += (Z * (nd.expand_dims( R_norm[:, i:i + 1, j:j + 1, :], axis=3))).sum(axis=4) return Rx
def _alphabeta_lrp(self,R,alpha): ''' LRP according to Eq(60) in DOI: 10.1371/journal.pone.0130140 ''' beta = 1 - alpha N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): if self.lrp_aware: Z = self.Z[:,i,j,...] else: Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) Zplus = Z > 0 #index mask of positive forward predictions if alpha * beta != 0 : #the general case: both parameters are not 0 Zp = Z * Zplus Zsp = Zp.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B > 0), axis=0), axis=0), axis=0), axis=0) + 1e-16 Zn = Z - Zp Zsn = nd.expand_dims(self.Y[:,i:i+1,j:j+1,:], axis=3) - Zsp - 1e-16 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((alpha * (Zp/Zsp) + beta * (Zn/Zsn))* nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3)).sum(axis=4) elif alpha: #only alpha is not 0 -> alpha = 1, beta = 0 Zp = Z * Zplus Zsp = Zp.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B > 0), axis=0), axis=0), axis=0), axis=0) + 1e-16 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += (Zp*( nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) /Zsp)).sum(axis=4) elif beta: # only beta is not 0 -> alpha = 0, beta = 1 Zn = Z * (Z < 0) Zsn = Zn.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B < 0), axis=0), axis=0), axis=0), axis=0) + 1e-16 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += (Zn*( nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) /Zsn)).sum(axis=4) else: raise Exception('This case should never occur: alpha={}, beta={}.'.format(alpha, beta)) return Rx
def _flat_lrp(self,R): ''' distribute relevance for each output evenly to the output neurons' receptive fields. ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): Z = nd.ones((N,hf,wf,df,NF), ctx=self.ctx, dtype=self.dtype) Zs = Z.sum(axis=(1,2,3),keepdims=True) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((Z/Zs) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) return Rx
def _ww_lrp(self,R): ''' LRP according to Eq(12) in https://arxiv.org/pdf/1512.02479v1.pdf ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): Z = nd.expand_dims(self.W, 0)**2 Zs = Z.sum(axis=(1,2,3),keepdims=True) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((Z/Zs) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3)).sum(axis=4) return Rx
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def _epsilon_lrp_slow(self,R,epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 This function shows all necessary operations to perform LRP in one place and is therefore not optimized ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx=self.ctx, dtype=self.dtype) for i in range(Hout): for j in range(Wout): Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], 4) Zs = Z.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B, 0), 0), 0), 0) Zs += epsilon*((Zs >= 0)*2-1) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((Z/Zs) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) return Rx
def _epsilon_lrp(self,R,epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx=self.ctx, dtype=self.dtype) R_norm = R / (self.Y + epsilon*((self.Y >= 0)*2 - 1.)) for i in range(Hout): for j in range(Wout): if self.lrp_aware: Z = self.Z[:,i,j,...] else: Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , : ], axis=4) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += (Z * ( nd.expand_dims(R_norm[:,i:i+1,j:j+1,:], axis=3) )).sum(axis=4) return Rx
def backward(self,DY): ''' Backward-passes an input error gradient DY towards the input neurons of this layer. Parameters ---------- DY : mxnet.ndarray.ndarray.NDArray an error gradient shaped same as the output array of forward, i.e. (N,Hy,Wy,Dy) with N = number of samples in the batch Hy = heigth of the output Wy = width of the output Dy = output depth = input depth Returns ------- DX : mxnet.ndarray.ndarray.NDArray the error gradient propagated towards the input ''' self.DY = DY N,Hy,Wy,NF = DY.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride DX = nd.zeros_like(self.X,ctx=self.ctx, dtype=self.dtype) if not (hf == wf and self.stride == (1,1)): for i in range(Hy): for j in range(Wy): DX[:,i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , : ] += ( nd.expand_dims(self.W, axis=0) * nd.expand_dims(DY[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) #sum over all the filters else: for i in range(hf): for j in range(wf): DX[:,i:i+Hy:hstride,j:j+Wy:wstride,:] += nd.dot(DY,self.W[i,j,:,:].T) return DX #* (hf*wf*df)**.5 / (NF*Hy*Wy)**.5
def update(self,lrate): N,Hx,Wx,Dx = self.X.shape N,Hy,Wy,NF = self.DY.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride DW = nd.zeros_like(self.W,ctx=self.ctx, dtype=self.dtype) if not (hf == wf and self.stride == (1,1)): for i in range(Hy): for j in range(Wy): DW += ( nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) * nd.expand_dims(self.DY[:,i:i+1,j:j+1,:], axis=3)).sum(axis=0) else: for i in range(hf): for j in range(wf): DW[i,j,:,:] = nd.sum( nd.expand_dims(self.X[:,i:i+Hy:hstride,j:j+Wy:wstride,:], axis=4) * nd.expand_dims(self.DY, axis=3) ,axis=(0,1,2)) DB = self.DY.sum(axis=(0,1,2)) self.W -= lrate * DW / (hf*wf*df*Hy*Wy)**.5 self.B -= lrate * DB / (Hy*Wy)**.5
def heatmap_to_coord(heatmaps, bbox_list): heatmap_height = heatmaps.shape[2] heatmap_width = heatmaps.shape[3] coords, maxvals = get_max_pred(heatmaps) preds = nd.zeros_like(coords) for i, bbox in enumerate(bbox_list): x0 = bbox[0] y0 = bbox[1] x1 = bbox[2] y1 = bbox[3] w = (x1 - x0) / 2 h = (y1 - y0) / 2 center = np.array([x0 + w, y0 + h]) scale = np.array([w, h]) w_ratio = coords[i][:, 0] / heatmap_width h_ratio = coords[i][:, 1] / heatmap_height preds[i][:, 0] = scale[0] * 2 * w_ratio + center[0] - scale[0] preds[i][:, 1] = scale[1] * 2 * h_ratio + center[1] - scale[1] return preds, maxvals
def _alphabeta_lrp_slow(self,R,alpha): ''' LRP according to Eq(60) in DOI: 10.1371/journal.pone.0130140 This function shows all necessary operations to perform LRP in one place and is therefore not optimized ''' beta = 1 - alpha N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) if not alpha == 0: Zp = Z * (Z > 0) Bp = nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B > 0), axis=0), axis=0), axis=0), axis=0) Zsp = Zp.sum(axis=(1,2,3),keepdims=True) + Bp Ralpha = alpha * ((Zp/Zsp) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) else: Ralpha = 0 if not beta == 0: Zn = Z * (Z < 0) Bn = nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B < 0), axis=0), axis=0), axis=0), axis=0) Zsn = Zn.sum(axis=(1,2,3),keepdims=True) + Bn Rbeta = beta * ((Zn/Zsn) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) else: Rbeta = 0 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += Ralpha + Rbeta return Rx
def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None): """Generating training targets that do not require network predictions. Parameters ---------- img : mxnet.nd.NDArray Original image tensor. xs : list of mxnet.nd.NDArray List of feature maps. anchors : mxnet.nd.NDArray YOLO3 anchors. offsets : mxnet.nd.NDArray Pre-generated x and y offsets for YOLO3. gt_boxes : mxnet.nd.NDArray Ground-truth boxes. gt_ids : mxnet.nd.NDArray Ground-truth IDs. gt_mixratio : mxnet.nd.NDArray, optional Mixup ratio from 0 to 1. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ assert isinstance(anchors, (list, tuple)) all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0) assert isinstance(offsets, (list, tuple)) all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0) num_anchors = np.cumsum([a.size // 2 for a in anchors]) num_offsets = np.cumsum([o.size // 2 for o in offsets]) _offsets = [0] + num_offsets.tolist() assert isinstance(xs, (list, tuple)) assert len(xs) == len(anchors) == len(offsets) # orig image size orig_height = img.shape[2] orig_width = img.shape[3] with autograd.pause(): # outputs shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape( (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0) center_targets = nd.zeros_like(shape_like) scale_targets = nd.zeros_like(center_targets) weights = nd.zeros_like(center_targets) objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0]) class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class) class_targets[:] = -1 # prefill -1 for ignores # for each ground-truth, find the best matching anchor within the particular grid # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map # then only the anchor in (3, 4) is going to be matched gtx, gty, gtw, gth = self.bbox2center(gt_boxes) shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1) anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1) # zero center anchors shift_anchor_boxes = self.bbox2corner(anchor_boxes) ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2)) # real value is required to process, convert to Numpy matches = ious.argmax(axis=1).asnumpy() # (B, M) valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1) # (B, M) np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]] np_anchors = all_anchors.asnumpy() np_gt_ids = gt_ids.asnumpy() np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop # should not be a problem right now. Switch to better solution is needed. for b in range(matches.shape[0]): for m in range(matches.shape[1]): if valid_gts[b, m] < 1: break match = int(matches[b, m]) nlayer = np.nonzero(num_anchors > match)[0][0] height = xs[nlayer].shape[2] width = xs[nlayer].shape[3] gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0], np_gtw[b, m, 0], np_gth[b, m, 0]) # compute the location of the gt centers loc_x = int(gtx / orig_width * width) loc_y = int(gty / orig_height * height) # write back to targets index = _offsets[nlayer] + loc_y * width + loc_x center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x # tx center_targets[b, index, match, 1] = gty / orig_height * height - loc_y # ty scale_targets[b, index, match, 0] = np.log(gtw / np_anchors[match, 0]) scale_targets[b, index, match, 1] = np.log(gth / np_anchors[match, 1]) weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height objectness[b, index, match, 0] = ( np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1) class_targets[b, index, match, :] = 0 class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1 # since some stages won't see partial anchors, so we have to slice the correct targets objectness = self._slice(objectness, num_anchors, num_offsets) center_targets = self._slice(center_targets, num_anchors, num_offsets) scale_targets = self._slice(scale_targets, num_anchors, num_offsets) weights = self._slice(weights, num_anchors, num_offsets) class_targets = self._slice(class_targets, num_anchors, num_offsets) return objectness, center_targets, scale_targets, weights, class_targets