def layerwise_relevance_zclip(self, out, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] z = self._out weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) alpha = z > 0. beta = z < 0. a.attach_grad() with autograd.record(): zplus = self._forward(data=a, weight=wplus, bias=bplus) cplus, = autograd.grad(zplus, a, head_grads=alpha * R / (zplus + (zplus == 0.))) with autograd.record(): zminus = self._forward(data=a, weight=wminus, bias=bminus) cminus, = autograd.grad(zminus, a, head_grads=beta * R / (zminus + (zminus == 0.))) return a * (cplus - cminus)
def _compute_yolo_iou(self, F, boxes1, boxes2): ''' IoU of corresponding anchors ''' # to corner representation x11 = boxes1[:, :, :, :, 0] - boxes1[:, :, :, :, 2] / 2.0 y11 = boxes1[:, :, :, :, 1] - boxes1[:, :, :, :, 3] / 2.0 x12 = boxes1[:, :, :, :, 0] + boxes1[:, :, :, :, 2] / 2.0 y12 = boxes1[:, :, :, :, 1] + boxes1[:, :, :, :, 3] / 2.0 boxes1_new = nd.stack([x11, y11, x12, y12], axis=-1) x21 = boxes2[:, :, :, :, 0] - boxes2[:, :, :, :, 2] / 2.0 y21 = boxes2[:, :, :, :, 1] - boxes2[:, :, :, :, 3] / 2.0 x22 = boxes2[:, :, :, :, 0] + boxes2[:, :, :, :, 2] / 2.0 y22 = boxes2[:, :, :, :, 1] + boxes2[:, :, :, :, 3] / 2.0 boxes2_new = nd.stack([x21, y21, x22, y22], axis=-1) # calculating 2 border points upperleft = nd.maximum(boxes1_new[:, :, :, :, :2], boxes2_new[:, :, :, :, :2]) lowerright = nd.minimum(boxes1_new[:, :, :, :, 2:], boxes2_new[:, :, :, :, 2:]) intersection_dims = nd.maximum(0.0, lowerright - upperleft) intersection_area = intersection_dims[:, :, :, :, 0] * intersection_dims[:, :, :, :, 1] area1 = boxes1_new[:, :, :, :, 3] * boxes1_new[:, :, :, :, 2] area2 = boxes2_new[:, :, :, :, 3] * boxes2_new[:, :, :, :, 2] union_area = nd.maximum(1e-8, area1 + area2 - intersection_area) return nd.clip(intersection_area / union_area, a_min=0.0, a_max=1.0)
def layerwise_relevance_zb(self, out, lo=-1, hi=1, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bias = None bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) upper = nd.ones_like(a) * hi lower = nd.ones_like(a) * lo a.attach_grad() upper.attach_grad() lower.attach_grad() with autograd.record(): zlh = (self._forward(a, weight, bias) - self._forward(lower, wplus, bplus) - self._forward(upper, wminus, bminus)) zlh.backward(out_grad=R / (zlh + (zlh == 0.))) return a * a.grad + upper * upper.grad + lower * lower.grad
def layerwise_relevance_zclip(self, out, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] z = self._out weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) alpha = z > 0. beta = z < 0. a.attach_grad() with autograd.record(): zplus = self._forward(data=a, weight=wplus, bias=bplus) cplus, = autograd.grad(zplus, a, head_grads=alpha*R/(zplus + (zplus == 0.))) with autograd.record(): zminus = self._forward(data=a, weight=wminus, bias=bminus) cminus, = autograd.grad(zminus, a, head_grads=beta*R/(zminus + (zminus == 0.))) return a*(cplus - cminus)
def layerwise_relevance_zb(self, out, lo=-1, hi=1, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bias = None bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) upper = nd.ones_like(a)*hi lower = nd.ones_like(a)*lo a.attach_grad() upper.attach_grad() lower.attach_grad() with autograd.record(): zlh = ( self._forward(a, weight, bias) - self._forward(lower, wplus, bplus) - self._forward(upper, wminus, bminus) ) zlh.backward(out_grad=R/(zlh + (zlh == 0.))) return a*a.grad + upper*upper.grad + lower*lower.grad
def get_iou(predict, target, mode=1): ''' @input: predict: m*n*4, target :(cltrb), mode :1:target is cltrb 2:target is cyxhw @return (m*n*1) ndarray ''' l, t, r, b = predict.split(num_outputs=4, axis=-1) if mode == 1: l2 = target[1] t2 = target[2] r2 = target[3] b2 = target[4] elif mode == 2: l2 = target[2] - target[4]/2 t2 = target[1] - target[3]/2 r2 = target[2] + target[4]/2 b2 = target[1] + target[3]/2 else: print('mode should be int 1 or 2') i_left = nd.maximum(l2, l) i_top = nd.maximum(t2, t) i_right = nd.minimum(r2, r) i_bottom = nd.minimum(b2, b) iw = nd.maximum(i_right - i_left, 0.) ih = nd.maximum(i_bottom - i_top, 0.) inters = iw * ih predict_area = (r-l)*(b-t) target_area = target[3] * target[4] ious = inters/(predict_area + target_area - inters) return ious # 1344x3x1
def extract_multi_position_matrix_nd(bbox): bbox = nd.transpose(bbox, axes=(1, 0, 2)) xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=2) # [num_fg_classes, num_boxes, 1] bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) # [num_fg_classes, num_boxes, num_boxes] delta_x = nd.broadcast_minus(lhs=center_x, rhs=nd.transpose(center_x, axes=(0, 2, 1))) delta_x = nd.broadcast_div(delta_x, bbox_width) delta_x = nd.log(nd.maximum(nd.abs(delta_x), 1e-3)) delta_y = nd.broadcast_minus(lhs=center_y, rhs=nd.transpose(center_y, axes=(0, 2, 1))) delta_y = nd.broadcast_div(delta_y, bbox_height) delta_y = nd.log(nd.maximum(nd.abs(delta_y), 1e-3)) delta_width = nd.broadcast_div(lhs=bbox_width, rhs=nd.transpose(bbox_width, axes=(0, 2, 1))) delta_width = nd.log(delta_width) delta_height = nd.broadcast_div(lhs=bbox_height, rhs=nd.transpose(bbox_height, axes=(0, 2, 1))) delta_height = nd.log(delta_height) concat_list = [delta_x, delta_y, delta_width, delta_height] for idx, sym in enumerate(concat_list): concat_list[idx] = nd.expand_dims(sym, axis=3) position_matrix = nd.concat(*concat_list, dim=3) return position_matrix
def test_maximum(): x = mx.nd.ones(LARGE_X) * 3 y = mx.nd.ones(LARGE_X) * 4 z = nd.maximum(x, y) assert z[0] == 4 assert z[-1] == 4 z = nd.maximum(x, 5) assert z[0] == 5 assert z[-1] == 5
def forward(self, x): parent_tau = 0 if (self._parent is not None): parent_tau = self._parent._box._tau.data() delta = self._tau.data() - parent_tau s = nd.sum(nd.maximum(x - self._max_list.data(), 0) + nd.maximum(self._min_list.data() - x, 0), axis=1, keepdims=True) return 1 - nd.exp(-1 * delta * s)
def CapsuleMarginLoss(y_pred, labels, lambda_value): #print(y_pred) #print(labels) labels_onehot = labels first_term_base = nd.square(nd.maximum(0.9 - y_pred, 0)) second_term_base = nd.square(nd.maximum(y_pred - 0.1, 0)) margin_loss = labels_onehot * first_term_base + lambda_value * ( 1 - labels_onehot) * second_term_base margin_loss = margin_loss.sum(axis=1) return margin_loss
def forward(self, labels, y_pred): labels_onehot = labels #nd.one_hot(labels, self.num_classes) first_term_base = nd.square(nd.maximum(0.9 - y_pred, 0)) second_term_base = nd.square(nd.maximum(y_pred - 0.1, 0)) # import pdb; pdb.set_trace() margin_loss = labels_onehot * first_term_base + self.lambda_value * ( 1 - labels_onehot) * second_term_base margin_loss = margin_loss.sum(axis=1) loss = nd.mean(margin_loss, axis=self._batch_axis, exclude=True) loss = _apply_weighting(nd, loss, self._weight / 2, self.sample_weight) return nd.mean(loss, axis=self._batch_axis, exclude=True)
def forward(self, x): parent_tau = 0 if (self._parent is not None): parent_tau = self._parent._box._tau.data() if (self._min_list.shape is None and self._max_list.shape is None): return nd.expand_dims(nd.ones_like(x[:, 0]), axis=-1) s = nd.sum(nd.maximum(x - self._max_list.data(), 0) + nd.maximum(self._min_list.data() - x, 0), axis=1, keepdims=True) delta = self._tau.data() - parent_tau return 1 - nd.exp(-1 * delta * s)
def sample(match, cls_pred, iou, ratio=3, min_sample=0, threshold=0.5, do=True): if do is False: ones = nd.ones_like(match) sample = nd.where(match > -0.5, ones, ones*-1) return sample sample = nd.zeros_like(match) num_pos = nd.sum(match > -0.5, axis=-1) requre_neg = ratio * num_pos neg_mask = nd.where(match < -0.5, nd.max(iou, axis=-1) < threshold, sample) max_neg = neg_mask.sum(axis=-1) num_neg = nd.minimum(max_neg, nd.maximum(requre_neg, min_sample)).astype('int') neg_prob = cls_pred[:,:,0] max_value = nd.max(cls_pred, axis=-1, keepdims=True) score = max_value[:,:,0] - neg_prob + nd.log( nd.sum( nd.exp(cls_pred-max_value), axis=-1)) score = nd.where(neg_mask, score, nd.zeros_like(score)) argmax = nd.argsort(score, axis=-1, is_ascend=False) sample = nd.where(match > -0.5, nd.ones_like(sample), sample) for i, num in enumerate(num_neg): sample[i, argmax[i,:num.asscalar()]] = -1 return sample
def _spectral_norm(self, weight: Tensor, u: Tensor) -> Tensor: """ Adapted from https://github.com/apache/incubator- mxnet/blob/master/example/gluon/sn_gan/model.py. """ w = weight w_mat = nd.reshape(w, [w.shape[0], -1]) _u = u _v = None for _ in range(self._num_power_iter): _v = nd.L2Normalization(nd.dot(_u, w_mat)) _u = nd.L2Normalization(nd.dot(_v, w_mat.T)) sigma = nd.sum(nd.dot(_u, w_mat) * _v) # this is different from standard spectral normalization sigma = nd.maximum(nd.ones(1, ctx=self._ctx), sigma / self._coeff) if sigma == 0.0: sigma = EPSILON with autograd.pause(): self._u.set_data(_u) return w / sigma
def hybrid_forward(self, F, output, *args, **kwargs): ''' Returns the Softmax Cross Entropy loss of a model with a graph vocab, in the style of a sentinel pointer network Note: Unlike VarNamingLoss, this Loss DOES expect the last dimension of output to be probabilities summing to 1 ''' (label, _), data_encoder = args joint_label, label_lengths = label.values, label.value_lengths # We're using pick and not just sparse labels for XEnt b/c there can be multiple ways to point to the correct subtoken loss = nd.pick(output, joint_label, axis=2) # Masking outputs to max(length_of_output (based on emitting value 0), length_of_label) output_preds = nd.argmax(output, axis=2).asnumpy() output_lengths = [] for row in output_preds: end_token_idxs = np.where(row == 0)[0] if len(end_token_idxs): output_lengths.append(int(min(end_token_idxs)) + 1) else: output_lengths.append(output.shape[1]) output_lengths = nd.array(output_lengths, ctx=output.context) mask_lengths = nd.maximum(output_lengths, label_lengths) loss = nd.SequenceMask(loss, value=1.0, use_sequence_length=True, sequence_length=mask_lengths, axis=1) return nd.mean(-nd.log(loss), axis=0, exclude=True)
def volume_render_radiance_field(radiance_field, depth_values, ray_directions, radiance_field_noise_std=0.0, white_background=False): # TESTED one_e_10 = nd.array([1e10], dtype=ray_directions.dtype, ctx=ray_directions.context).broadcast_to(depth_values[..., :1].shape) dists = nd.concat(*[depth_values[..., 1:] - depth_values[..., :-1], one_e_10], dim=-1) dists = dists * ray_directions[..., None, :].norm(ord=2, axis=-1) rgb = nd.sigmoid(radiance_field[..., :3]) noise = 0.0 if radiance_field_noise_std > 0.0: noise = nd.random.normal(0.0, 1.0, shape=radiance_field[..., 3].shape, dtype=radiance_field.dtype, ctx=radiance_field.context) noise = noise * radiance_field_noise_std sigma_a = nd.relu(radiance_field[..., 3] + noise) alpha = 1.0 - nd.exp(-sigma_a * dists) weights = alpha * cumprod_exclusive_gluon(1.0 - alpha + 1e-10) rgb_map = weights[..., None] * rgb rgb_map = rgb_map.sum(axis=-2) depth_map = weights * depth_values depth_map = depth_map.sum(axis=-1) # depth_map = (weights * depth_values).sum(dim=-1) acc_map = weights.sum(axis=-1) disp_map = 1.0 / nd.maximum(1e-10 * nd.ones_like(depth_map), depth_map / acc_map) if white_background: rgb_map = rgb_map + (1.0 - acc_map[..., None]) return rgb_map, disp_map, acc_map, weights, depth_map
def relu(x): """ 定义激活函数 :param x: :return: """ return nd.maximum(x, 0)
def relu(X): """ 定义激活函数 :param X: :return: """ return nd.maximum(X, 0)
def clip(tensor, a_min=None, a_max=None, inplace=False): if a_min is not None and a_max is not None: if inplace: tensor[:] = np.maximum(np.minimum(tensor, a_max), a_min) else: tensor = np.maximum(np.minimum(tensor, a_max), a_min) elif min is not None: if inplace: tensor[:] = np.maximum(tensor, a_min) else: tensor = np.maximum(tensor, a_min) elif min is not None: if inplace: tensor[:] = np.minimum(tensor, a_max) else: tensor = np.minimum(tensor, a_max) return tensor
def clip(tensor, a_min=None, a_max=None, indlace=False): if a_min is not None and a_max is not None: if indlace: nd.max(nd.min(tensor, a_max, out=tensor), a_min, out=tensor) else: tensor = nd.maximum(nd.minimum(tensor, a_max), a_min) elif min is not None: if indlace: nd.max(tensor, a_min, out=tensor) else: tensor = nd.maximum(tensor, a_min) elif max is not None: if indlace: nd.min(tensor, a_max, out=tensor) else: tensor = nd.minimum(tensor, a_max) return tensor
def layerwise_relevance_zplus(self, out, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) bplus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) a.attach_grad() with autograd.record(): z = self._forward(data=a, weight=wplus, bias=bplus) c, = autograd.grad(z, a, head_grads=R / (z + (z == 0.))) return a * c
def lookup(self, labels: nd.NDArray, repeat: bool = True): """Return the distribution for the data batch.""" shape = self._mean_arg_emb.shape mean_arg_emb = nd.Embedding(labels, self._mean_arg_emb.data(), *shape) self.mean = nd.maximum(5e-3, self.link_function(mean_arg_emb)) if hasattr(self._mean_arg_emb, 'n_repeats') and repeat: self.mean_repeated = self.link_function( util.repeat_emb(self._mean_arg_emb, mean_arg_emb)) return self
def evaluate_edit_distance(data_loader: AsyncDataLoader, model): ''' Measures the mean (over instances) of the characterwise edit distance (Levenshtein distance) between predicted and true names ''' logged_example = False with data_loader as data_loader: cum_edit_distance = 0 for split_batch, batch_length in tqdm(data_loader, total=data_loader.total_batches): batches_outputs = [(batch, model(batch.data)) for batch in split_batch] for batch, output in batches_outputs: predictions_labels = model.unbatchify(batch, output) for prediction, label in predictions_labels: if not logged_example: logger.info('Some example predictions:\n{}'.format( pprint.pformat(predictions_labels[:10]))) logged_example = True pred_name = ''.join(prediction) real_name = ''.join(label) cum_edit_distance += editdistance.eval( pred_name, real_name) return cum_edit_distance / len(data_loader) pred = [] true = [] for i in tqdm(range(0, math.ceil(len(dataset) / n_batch))): data = dataset[n_batch * i:n_batch * (i + 1)] graph, label = model.batchify(data, ctx) output = model(graph) predictions = nd.argmax(output, axis=2) # Masking output to max(length_of_output, length_of_label) output_preds = predictions.asnumpy() output_lengths = [] for row in output_preds: end_token_idxs = np.where(row == 0)[0] if len(end_token_idxs): output_lengths.append(int(min(end_token_idxs))) else: output_lengths.append(model.max_name_length) output_lengths = nd.array(output_lengths, ctx=ctx) mask_lengths = nd.maximum(output_lengths, label.value_lengths) output = nd.SequenceMask(predictions, value=-1, use_sequence_length=True, sequence_length=mask_lengths, axis=1).asnumpy().astype('int32') labels = nd.SequenceMask(label.values, value=-1, use_sequence_length=True, sequence_length=mask_lengths.astype('int32'), axis=1).asnumpy() pred += [i for i in output.flatten().tolist() if i >= 0] true += [i for i in labels.flatten().tolist() if i >= 0] return metrics.f1_score(true, pred, average='weighted')
def layerwise_relevance_zplus(self, out, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) bplus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) a.attach_grad() with autograd.record(): z = self._forward(data=a, weight=wplus, bias=bplus) c, = autograd.grad(z, a, head_grads=R/(z + (z == 0.))) return a*c
def relu(X): """ Activation function Parameters ---------- X : mxnet.ndarray An input vector """ return nd.maximum(X, nd.zeros_like(X))
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch pos_ct = [ct > 0 for ct in cls_target] num_pos = [ct.sum() for ct in pos_ct] num_pos_all = sum([p.asscalar() for p in num_pos]) # print ('num_pos_all: {}'.format(num_pos_all)) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [ nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred) ] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): # print ('cp shape: {}'.format(cp.shape)) # print ('bp shape: {}'.format(bp.shape)) # print ('ct shape: {}'.format(ct.shape)) # print ('bt shape: {}'.format(bt.shape)) pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum( self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append( nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append( nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def forward(self, x): root = next(iter(self._structure.items()))[0] if (len(self._routerlayer) > 0): router_d, router_mat_d, weight_d, embedd_d = self._contextify(x)( root) # router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis = -1) # weight = nd.stack(*[weight_d[key] for key in sorted(weight_d)], axis = -1) # # embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis = 0) # router_mat = nd.stack( # *[router_mat_d[key] for key in sorted(router_mat_d)], axis = 1) # # presence = nd.sum(router_mat, axis = 2) # weight_adj = presence * weight # depth = len(self._weightlayer) - nd.topk(nd.reverse(presence, axis = 1)) # depth = depth - 1 # depth = depth[:, 0] # remainder = 1 - nd.sum(weight_adj, axis = 1) # # if (mx.autograd.is_training()): # # remainder = remainder + nd.choose_element_0index(weight_adj, depth) # remainder = remainder + nd.concat( # *[x[d] for d, x in zip(depth, weight_adj)], dim = 0) # # weight_adj = nd.fill_element_0index(weight_adj, remainder, depth) # weight_adj = nd.stack( # *[nd.concat(*[y if i != d else r for i, y in enumerate(x)], dim = 0) # for d, r, x in zip(depth, remainder, weight_adj) # ], axis = 0) # else: # remainder = remainder + nd.choose_element_0index(weight_adj, depth) # weight_adj = nd.fill_element_0index(weight_adj, remainder, depth) # # head = nd.sum(nd.expand_dims(weight_adj, axis = 2) * router_mat, axis = 1) # # return nd.dot(head, embedd) embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis=0) router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis=-1) router_mat = nd.stack( *[router_mat_d[key] for key in sorted(router_mat_d)], axis=1) where = nd.argmax(nd.maximum(0, 1 / (router + 0.5)), axis=1) head = nd.concat(*[router_mat[i][k] for i, k in enumerate(where)], dim=0) return nd.dot(head, embedd) else: head = nd.ones_like(nd.slice_axis(x, axis=1, begin=0, end=None)) return self._contextify(x)(root) * head
def get_iou(predict, target, mode=1): ''' Parameter: ---------- predict: mxnet.ndarray channels are {???}*4 target: mxnet.ndarray target.shape = (5) mode: [1,2] 1: target format is cltrb 2: target fromat is cyxhw Returns ---------- ious: mxnet.ndarray ious between predict and target, dimasion is {???}x1 ''' l, t, r, b = predict.split(num_outputs=4, axis=-1) if mode == 1: l2 = target[1] t2 = target[2] r2 = target[3] b2 = target[4] elif mode == 2: l2 = target[2] - target[4] / 2 t2 = target[1] - target[3] / 2 r2 = target[2] + target[4] / 2 b2 = target[1] + target[3] / 2 else: print('mode should be int 1 or 2') i_left = nd.maximum(l2, l) i_top = nd.maximum(t2, t) i_right = nd.minimum(r2, r) i_bottom = nd.minimum(b2, b) iw = nd.maximum(i_right - i_left, 0.) ih = nd.maximum(i_bottom - i_top, 0.) inters = iw * ih predict_area = (r - l) * (b - t) target_area = target[3] * target[4] ious = inters / (predict_area + target_area - inters) return ious # 1344x3x1
def fltrust(epoch, gradients, net, lr, f, byz): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] # let the malicious clients (first f clients) perform the byzantine attack param_list = byz(epoch, param_list, net, lr, f) n = len(param_list ) - 1 # -1 so as to not include the gradient of the server model # use the last gradient (server update) as the trusted source #print(nd.array(param_list[-1]).shape) baseline = nd.array(param_list[-1]).squeeze() #print(baseline.shape) cos_sim = [] new_param_list = [] #print(param_list[0].shape) print(nd.norm(baseline)) # compute cos similarity for each_param_list in param_list: each_param_array = nd.array(each_param_list).squeeze() cos_sim.append( nd.dot(baseline, each_param_array) / (nd.norm(baseline) + 1e-9) / (nd.norm(each_param_array) + 1e-9)) cos_sim = nd.stack(*cos_sim)[:-1] #print(cos_sim) cos_sim = nd.maximum(cos_sim, 0) # relu cos_sim = nd.minimum(cos_sim, 1) #print(cos_sim) normalized_weights = cos_sim / (nd.sum(cos_sim) + 1e-9 ) # weighted trust score #print(normalized_weights) # normalize the magnitudes and weight by the trust score for i in range(n): new_param_list.append(param_list[i] * normalized_weights[i] / (nd.norm(param_list[i]) + 1e-9) * nd.norm(baseline)) #print(normalized_weights[i] / (nd.norm(param_list[i]) + 1e-9) * nd.norm(baseline)) #print("normalized weights: " + str(normalized_weights[i])) #print("baseline: " + str(nd.norm(baseline))) # update the global model global_update = nd.sum(nd.concat(*new_param_list, dim=1), axis=-1) idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue #print(global_update[idx:(idx+param.data().size)]) param.set_data(param.data() - lr * global_update[idx:( idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size
def hybrid_forward(self, F, images, num_classes, labels, X_l2norm, lambda_value=0.5, sample_weight=None): self.num_classes = num_classes labels_onehot = nd.one_hot(labels, num_classes) first_term_base = F.square(nd.maximum(0.9 - X_l2norm, 0)) second_term_base = F.square(nd.maximum(X_l2norm - 0.1, 0)) # import pdb; pdb.set_trace() margin_loss = labels_onehot * first_term_base + lambda_value * ( 1 - labels_onehot) * second_term_base margin_loss = margin_loss.sum(axis=1) loss = F.mean(margin_loss, axis=self._batch_axis, exclude=True) loss = _apply_weighting(F, loss, self._weight / 2, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def box_iou(b1, b2): '''Return iou tensor Parameters ---------- b1: tensor, shape=(i1,...,iN, 4), xywh b2: tensor, shape=(j, 4), xywh Returns ------- iou: tensor, shape=(i1,...,iN, j) ''' # Expand dim to apply broadcasting. b1 = nd.expand_dims(b1, -2) b1_xy = b1[:, :, :2] b1_wh = b1[:, :, 2:4] b1_wh_half = b1_wh/2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # Expand dim to apply broadcasting. b2 = nd.expand_dims(b2, 0) b2_xy = b2[:, :, :2] b2_wh = b2[:, :, 2:4] b2_wh_half = b2_wh/2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = nd.maximum(b1_mins, b2_mins) intersect_maxes = nd.minimum(b1_maxes, b2_maxes) intersect_wh = nd.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[:, :, 0] * intersect_wh[:, :, 1] b1_area = b1_wh[:, :, 0] * b1_wh[:, :, 1] b2_area = b2_wh[:, :, 0] * b2_wh[:, :, 1] iou = intersect_area / (b1_area + b2_area - intersect_area) return iou
def bbox_iou(lhs, rhs, x1y1x2y2=True): if x1y1x2y2: b1_xmin, b1_ymin, b1_xmax, b1_ymax = nd.split(lhs, axis=-1, num_outputs=4) b2_xmin, b2_ymin, b2_xmax, b2_ymax = nd.split(rhs, axis=-1, num_outputs=4) else: b1_x, b1_y, b1_w, b1_h = nd.split(lhs, axis=-1, num_outputs=4) b2_x, b2_y, b2_w, b2_h = nd.split(rhs, axis=-1, num_outputs=4) b1_xmin, b1_xmax = b1_x - b1_w / 2., b1_x + b1_w / 2. b1_ymin, b1_ymax = b1_y - b1_h / 2., b1_y + b1_h / 2. b2_xmin, b2_xmax = b2_x - b2_w / 2., b2_x + b2_w / 2. b2_ymin, b2_ymax = b2_y - b2_h / 2., b2_y + b2_h / 2. # Intersection area MAX = 1e5 inter_w = nd.clip( nd.minimum(b1_xmax, b2_xmax) - nd.maximum(b1_xmin, b2_xmin), 0, MAX) inter_h = nd.clip( nd.minimum(b1_ymax, b2_ymax) - nd.maximum(b1_ymin, b2_ymin), 0, MAX) # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w) # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h) inter = inter_w * inter_h # Union Area w1, h1 = b1_xmax - b1_xmin, b1_ymax - b1_ymin w2, h2 = b2_xmax - b2_xmin, b2_ymax - b2_ymin # w1 = F.where(w1 < 0., F.zeros_like(w1), w1) # h1 = F.where(h1 < 0., F.zeros_like(h1), h1) # w2 = F.where(w2 < 0., F.zeros_like(w2), w2) # h2 = F.where(h2 < 0., F.zeros_like(h2), h2) union = (w1 * h1 + 1e-16) + w2 * h2 - inter iou = inter / union # iou return iou
def _block(x): if (node._box._min_list.shape is None and node._box._max_list.shape is None): _sample(node)(x) else: el = nd.maximum( node._box._min_list.data() - nd.min(x, axis=0), 0) eu = nd.maximum( nd.max(x, axis=0) - node._box._max_list.data(), 0) extent = nd.sum(el + eu) parent_tau = 0 if (node._box._parent is not None): parent_tau = node._box._parent._box._tau.data() if (extent == 0): _go_below(x) else: e = nd.random.exponential(1 / extent) if (parent_tau + e < node._box._tau.data()): _go_above(x, parent_tau + e) else: _go_below(x)
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def forward(self, x): return nd.maximum(0., x)