def layerwise_relevance_zclip(self, out, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] z = self._out weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) alpha = z > 0. beta = z < 0. a.attach_grad() with autograd.record(): zplus = self._forward(data=a, weight=wplus, bias=bplus) cplus, = autograd.grad(zplus, a, head_grads=alpha * R / (zplus + (zplus == 0.))) with autograd.record(): zminus = self._forward(data=a, weight=wminus, bias=bminus) cminus, = autograd.grad(zminus, a, head_grads=beta * R / (zminus + (zminus == 0.))) return a * (cplus - cminus)
def layerwise_relevance_zb(self, out, lo=-1, hi=1, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bias = None bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) upper = nd.ones_like(a) * hi lower = nd.ones_like(a) * lo a.attach_grad() upper.attach_grad() lower.attach_grad() with autograd.record(): zlh = (self._forward(a, weight, bias) - self._forward(lower, wplus, bplus) - self._forward(upper, wminus, bminus)) zlh.backward(out_grad=R / (zlh + (zlh == 0.))) return a * a.grad + upper * upper.grad + lower * lower.grad
def bgr2hsi(x): """ x:n,c(b,g,r),w,h return n,c(h,s,i),w,h """ sum_RGB = nd.sum(x.astype('float32'), axis=1) R = x[:, 0, :, :].astype('float32') G = x[:, 1, :, :].astype('float32') B = x[:, 2, :, :].astype('float32') r = (R + eps) / (sum_RGB + 3 * eps) g = (G + eps) / (sum_RGB + 3 * eps) b = (B + eps) / (sum_RGB + 3 * eps) cossita = (2 * r - g - b) / (2 * ((r - g)**2 + (r - b) * (g - b))**(1.0 / 2) + eps) cossita_cilp = nd.clip(cossita, -1.0, 1.0) sita = nd.arccos(cossita_cilp) h = (nd.where(g >= b, sita, 2 * math.pi - sita)).expand_dims(axis=1) s = (1 - 3 * nd.minimum(nd.minimum(r, g), b)).expand_dims(axis=1) s = nd.clip(s, 0., 1.) i = ((R + G + B) / 3).expand_dims(axis=1) return nd.concat(h, s, i, dim=1)
def get_iou(predict, target, mode=1): ''' @input: predict: m*n*4, target :(cltrb), mode :1:target is cltrb 2:target is cyxhw @return (m*n*1) ndarray ''' l, t, r, b = predict.split(num_outputs=4, axis=-1) if mode == 1: l2 = target[1] t2 = target[2] r2 = target[3] b2 = target[4] elif mode == 2: l2 = target[2] - target[4]/2 t2 = target[1] - target[3]/2 r2 = target[2] + target[4]/2 b2 = target[1] + target[3]/2 else: print('mode should be int 1 or 2') i_left = nd.maximum(l2, l) i_top = nd.maximum(t2, t) i_right = nd.minimum(r2, r) i_bottom = nd.minimum(b2, b) iw = nd.maximum(i_right - i_left, 0.) ih = nd.maximum(i_bottom - i_top, 0.) inters = iw * ih predict_area = (r-l)*(b-t) target_area = target[3] * target[4] ious = inters/(predict_area + target_area - inters) return ious # 1344x3x1
def layerwise_relevance_zclip(self, out, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] z = self._out weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) alpha = z > 0. beta = z < 0. a.attach_grad() with autograd.record(): zplus = self._forward(data=a, weight=wplus, bias=bplus) cplus, = autograd.grad(zplus, a, head_grads=alpha*R/(zplus + (zplus == 0.))) with autograd.record(): zminus = self._forward(data=a, weight=wminus, bias=bminus) cminus, = autograd.grad(zminus, a, head_grads=beta*R/(zminus + (zminus == 0.))) return a*(cplus - cminus)
def get_dis(data, mean, dis_method='iou'): if dis_method == 'iou': # data = bs*(w, h) ndarray # mean = 1*(w, h) ndarray # |--------|-----| # | inters | | # |--------| | h # | | # |--------------| # w data_w, data_h = data.split(num_outputs=2, axis=-1) mean_w, mean_h = mean inters_w = nd.minimum(data_w, mean_w) inters_h = nd.minimum(data_h, mean_h) inters = inters_w * inters_h data_area = data_w * data_h mean_area = mean_w * mean_h ious = inters / (data_area + mean_area - inters) distance = 1 / ious elif dis_method == 'L2': vec = data - mean distance = nd.norm(vec, ord=2, axis=-1).reshape((-1, 1)) return distance
def layerwise_relevance_zb(self, out, lo=-1, hi=1, use_bias=False, **kwargs): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') R = out a = self._in[0] weight = self.weight.data(ctx=a.context) wplus = nd.maximum(0., weight) wminus = nd.minimum(0., weight) bias = None bplus = None bminus = None if use_bias is not None: bias = self.bias.data(ctx=a.context) bplus = nd.maximum(0., bias) bminus = nd.minimum(0., bias) upper = nd.ones_like(a)*hi lower = nd.ones_like(a)*lo a.attach_grad() upper.attach_grad() lower.attach_grad() with autograd.record(): zlh = ( self._forward(a, weight, bias) - self._forward(lower, wplus, bplus) - self._forward(upper, wminus, bminus) ) zlh.backward(out_grad=R/(zlh + (zlh == 0.))) return a*a.grad + upper*upper.grad + lower*lower.grad
def test_minimum(): x = mx.nd.ones(LARGE_X) * 3 y = mx.nd.ones(LARGE_X) * 2 z = nd.minimum(x, y) assert z[0] == 2 assert z[-1] == 2 z = nd.minimum(x, 5) assert z[0] == 3 assert z[-1] == 3
def _compute_yolo_iou(self, F, boxes1, boxes2): ''' IoU of corresponding anchors ''' # to corner representation x11 = boxes1[:, :, :, :, 0] - boxes1[:, :, :, :, 2] / 2.0 y11 = boxes1[:, :, :, :, 1] - boxes1[:, :, :, :, 3] / 2.0 x12 = boxes1[:, :, :, :, 0] + boxes1[:, :, :, :, 2] / 2.0 y12 = boxes1[:, :, :, :, 1] + boxes1[:, :, :, :, 3] / 2.0 boxes1_new = nd.stack([x11, y11, x12, y12], axis=-1) x21 = boxes2[:, :, :, :, 0] - boxes2[:, :, :, :, 2] / 2.0 y21 = boxes2[:, :, :, :, 1] - boxes2[:, :, :, :, 3] / 2.0 x22 = boxes2[:, :, :, :, 0] + boxes2[:, :, :, :, 2] / 2.0 y22 = boxes2[:, :, :, :, 1] + boxes2[:, :, :, :, 3] / 2.0 boxes2_new = nd.stack([x21, y21, x22, y22], axis=-1) # calculating 2 border points upperleft = nd.maximum(boxes1_new[:, :, :, :, :2], boxes2_new[:, :, :, :, :2]) lowerright = nd.minimum(boxes1_new[:, :, :, :, 2:], boxes2_new[:, :, :, :, 2:]) intersection_dims = nd.maximum(0.0, lowerright - upperleft) intersection_area = intersection_dims[:, :, :, :, 0] * intersection_dims[:, :, :, :, 1] area1 = boxes1_new[:, :, :, :, 3] * boxes1_new[:, :, :, :, 2] area2 = boxes2_new[:, :, :, :, 3] * boxes2_new[:, :, :, :, 2] union_area = nd.maximum(1e-8, area1 + area2 - intersection_area) return nd.clip(intersection_area / union_area, a_min=0.0, a_max=1.0)
def old_update(self, b_s, b_a, b_r, b_logpac): b_s = nd.array(b_s, ctx=self.args.ctx).reshape( (-1, self.observation_dim)) b_a = nd.array(b_a, ctx=self.args.ctx).reshape((-1, self.action_dim)) b_r = nd.array(b_r, ctx=self.args.ctx).reshape((-1, 1)) b_oldpi_log_prob = nd.array(b_logpac, ctx=self.args.ctx).reshape( (-1, self.action_dim)) with autograd.record(): # Value loss v_pred, mu, sigma = self.net(b_s) advantage = b_r - v_pred vf_loss = nd.mean(nd.square(advantage)) # Detach from the computation graph advantage = advantage.detach() # Action loss pi_log_prob = self.net.log_prob(b_a, mu, sigma) ratio = nd.exp(pi_log_prob - b_oldpi_log_prob) surr1 = ratio * advantage surr2 = nd.clip(ratio, 1.0 - self.args.clip_param, 1.0 + self.args.clip_param) * advantage actor_loss = -nd.mean(nd.minimum(surr1, surr2)) entropy = self.net.entropy(sigma) # Total (maximize entropy to encourage exploration) loss = vf_loss * self.args.value_coefficient + actor_loss \ - entropy * self.args.entropy_coefficient loss.backward() self.trainer.step(b_s.shape[0])
def sample(match, cls_pred, iou, ratio=3, min_sample=0, threshold=0.5, do=True): if do is False: ones = nd.ones_like(match) sample = nd.where(match > -0.5, ones, ones*-1) return sample sample = nd.zeros_like(match) num_pos = nd.sum(match > -0.5, axis=-1) requre_neg = ratio * num_pos neg_mask = nd.where(match < -0.5, nd.max(iou, axis=-1) < threshold, sample) max_neg = neg_mask.sum(axis=-1) num_neg = nd.minimum(max_neg, nd.maximum(requre_neg, min_sample)).astype('int') neg_prob = cls_pred[:,:,0] max_value = nd.max(cls_pred, axis=-1, keepdims=True) score = max_value[:,:,0] - neg_prob + nd.log( nd.sum( nd.exp(cls_pred-max_value), axis=-1)) score = nd.where(neg_mask, score, nd.zeros_like(score)) argmax = nd.argsort(score, axis=-1, is_ascend=False) sample = nd.where(match > -0.5, nd.ones_like(sample), sample) for i, num in enumerate(num_neg): sample[i, argmax[i,:num.asscalar()]] = -1 return sample
def clip_grad(grads: Union[Generator[NDArray, NDArray, NDArray], List[NDArray], Tuple[NDArray]], clip_method: GradientClippingMethod, clip_val: float, inplace=True) -> List[NDArray]: """ Clip gradient values inplace :param grads: gradients to be clipped :param clip_method: clipping method :param clip_val: clipping value. Interpreted differently depending on clipping method. :param inplace: modify grads if True, otherwise create NDArrays :return: clipped gradients """ output = list(grads) if inplace else list(nd.empty(g.shape) for g in grads) if clip_method == GradientClippingMethod.ClipByGlobalNorm: norm_unclipped_grads = global_norm(grads) scale = clip_val / (norm_unclipped_grads.asscalar() + 1e-8 ) # todo: use branching operators? if scale < 1.0: for g, o in zip(grads, output): nd.broadcast_mul(g, nd.array([scale]), out=o) elif clip_method == GradientClippingMethod.ClipByValue: for g, o in zip(grads, output): g.clip(-clip_val, clip_val, out=o) elif clip_method == GradientClippingMethod.ClipByNorm: for g, o in zip(grads, output): nd.broadcast_mul(g, nd.minimum(1.0, clip_val / (g.norm() + 1e-8)), out=o) else: raise KeyError('Unsupported gradient clipping method') return output
def clip(tensor, a_min=None, a_max=None, inplace=False): if a_min is not None and a_max is not None: if inplace: tensor[:] = np.maximum(np.minimum(tensor, a_max), a_min) else: tensor = np.maximum(np.minimum(tensor, a_max), a_min) elif min is not None: if inplace: tensor[:] = np.maximum(tensor, a_min) else: tensor = np.maximum(tensor, a_min) elif min is not None: if inplace: tensor[:] = np.minimum(tensor, a_max) else: tensor = np.minimum(tensor, a_max) return tensor
def clip(tensor, a_min=None, a_max=None, indlace=False): if a_min is not None and a_max is not None: if indlace: nd.max(nd.min(tensor, a_max, out=tensor), a_min, out=tensor) else: tensor = nd.maximum(nd.minimum(tensor, a_max), a_min) elif min is not None: if indlace: nd.max(tensor, a_min, out=tensor) else: tensor = nd.maximum(tensor, a_min) elif max is not None: if indlace: nd.min(tensor, a_max, out=tensor) else: tensor = nd.minimum(tensor, a_max) return tensor
def get_iou(predict, target, mode=1): ''' Parameter: ---------- predict: mxnet.ndarray channels are {???}*4 target: mxnet.ndarray target.shape = (5) mode: [1,2] 1: target format is cltrb 2: target fromat is cyxhw Returns ---------- ious: mxnet.ndarray ious between predict and target, dimasion is {???}x1 ''' l, t, r, b = predict.split(num_outputs=4, axis=-1) if mode == 1: l2 = target[1] t2 = target[2] r2 = target[3] b2 = target[4] elif mode == 2: l2 = target[2] - target[4] / 2 t2 = target[1] - target[3] / 2 r2 = target[2] + target[4] / 2 b2 = target[1] + target[3] / 2 else: print('mode should be int 1 or 2') i_left = nd.maximum(l2, l) i_top = nd.maximum(t2, t) i_right = nd.minimum(r2, r) i_bottom = nd.minimum(b2, b) iw = nd.maximum(i_right - i_left, 0.) ih = nd.maximum(i_bottom - i_top, 0.) inters = iw * ih predict_area = (r - l) * (b - t) target_area = target[3] * target[4] ious = inters / (predict_area + target_area - inters) return ious # 1344x3x1
def fltrust(epoch, gradients, net, lr, f, byz): param_list = [ nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients ] # let the malicious clients (first f clients) perform the byzantine attack param_list = byz(epoch, param_list, net, lr, f) n = len(param_list ) - 1 # -1 so as to not include the gradient of the server model # use the last gradient (server update) as the trusted source #print(nd.array(param_list[-1]).shape) baseline = nd.array(param_list[-1]).squeeze() #print(baseline.shape) cos_sim = [] new_param_list = [] #print(param_list[0].shape) print(nd.norm(baseline)) # compute cos similarity for each_param_list in param_list: each_param_array = nd.array(each_param_list).squeeze() cos_sim.append( nd.dot(baseline, each_param_array) / (nd.norm(baseline) + 1e-9) / (nd.norm(each_param_array) + 1e-9)) cos_sim = nd.stack(*cos_sim)[:-1] #print(cos_sim) cos_sim = nd.maximum(cos_sim, 0) # relu cos_sim = nd.minimum(cos_sim, 1) #print(cos_sim) normalized_weights = cos_sim / (nd.sum(cos_sim) + 1e-9 ) # weighted trust score #print(normalized_weights) # normalize the magnitudes and weight by the trust score for i in range(n): new_param_list.append(param_list[i] * normalized_weights[i] / (nd.norm(param_list[i]) + 1e-9) * nd.norm(baseline)) #print(normalized_weights[i] / (nd.norm(param_list[i]) + 1e-9) * nd.norm(baseline)) #print("normalized weights: " + str(normalized_weights[i])) #print("baseline: " + str(nd.norm(baseline))) # update the global model global_update = nd.sum(nd.concat(*new_param_list, dim=1), axis=-1) idx = 0 for j, (param) in enumerate(net.collect_params().values()): if param.grad_req == 'null': continue #print(global_update[idx:(idx+param.data().size)]) param.set_data(param.data() - lr * global_update[idx:( idx + param.data().size)].reshape(param.data().shape)) idx += param.data().size
def bbox_iou(lhs, rhs, x1y1x2y2=True): if x1y1x2y2: b1_xmin, b1_ymin, b1_xmax, b1_ymax = nd.split(lhs, axis=-1, num_outputs=4) b2_xmin, b2_ymin, b2_xmax, b2_ymax = nd.split(rhs, axis=-1, num_outputs=4) else: b1_x, b1_y, b1_w, b1_h = nd.split(lhs, axis=-1, num_outputs=4) b2_x, b2_y, b2_w, b2_h = nd.split(rhs, axis=-1, num_outputs=4) b1_xmin, b1_xmax = b1_x - b1_w / 2., b1_x + b1_w / 2. b1_ymin, b1_ymax = b1_y - b1_h / 2., b1_y + b1_h / 2. b2_xmin, b2_xmax = b2_x - b2_w / 2., b2_x + b2_w / 2. b2_ymin, b2_ymax = b2_y - b2_h / 2., b2_y + b2_h / 2. # Intersection area MAX = 1e5 inter_w = nd.clip( nd.minimum(b1_xmax, b2_xmax) - nd.maximum(b1_xmin, b2_xmin), 0, MAX) inter_h = nd.clip( nd.minimum(b1_ymax, b2_ymax) - nd.maximum(b1_ymin, b2_ymin), 0, MAX) # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w) # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h) inter = inter_w * inter_h # Union Area w1, h1 = b1_xmax - b1_xmin, b1_ymax - b1_ymin w2, h2 = b2_xmax - b2_xmin, b2_ymax - b2_ymin # w1 = F.where(w1 < 0., F.zeros_like(w1), w1) # h1 = F.where(h1 < 0., F.zeros_like(h1), h1) # w2 = F.where(w2 < 0., F.zeros_like(w2), w2) # h2 = F.where(h2 < 0., F.zeros_like(h2), h2) union = (w1 * h1 + 1e-16) + w2 * h2 - inter iou = inter / union # iou return iou
def update(self, obs, returns, masks, actions, values, logpacs, lrnow, cliprange_now): advantages = returns - values advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) advantages = nd.array(advantages, ctx=self.args.ctx) # .reshape((-1, 1)) obs = np.transpose(obs, (0, 3, 1, 2)) obs = nd.array(obs, ctx=self.args.ctx) actions = nd.array(actions, ctx=self.args.ctx).reshape((-1, 1)) values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1)) returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1)) oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape((-1, 1)) # self.trainer.set_learning_rate(lrnow) # Auto grad with autograd.record(): # Value loss vpred, logits = self.net(obs) vpred_clipped = values + nd.clip(vpred - values, -cliprange_now, cliprange_now) vf_loss1 = nd.square(vpred - returns) vf_loss2 = nd.square(vpred_clipped - returns) vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2)) # Action loss # pi_log_prob = self.net.log_prob(logits, actions) pi_log_prob = nd.pick(logits, actions, 1) ratio = nd.exp(pi_log_prob - oldpi_log_prob) surr1 = ratio * advantages surr2 = nd.clip(ratio, 1.0 - cliprange_now, 1.0 + cliprange_now) * advantages actor_loss = -nd.mean(nd.minimum(surr1, surr2)) # Entropy term # entropy = self.net.entropy(logits) # Total loss # loss = vf_loss * self.args.value_coefficient + actor_loss # - entropy * self.args.entropy_coefficient loss = vf_loss + actor_loss # Compute gradients and updates loss.backward() self.trainer.step(obs.shape[0]) return actor_loss.asscalar(), vf_loss.asscalar() #, entropy.asscalar()
def update(self): self.total_train_steps += 1 state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory_buffer.sample(self.batch_size) # --------------optimize the critic network-------------------- with autograd.record(): # choose next action according to target policy network next_action_batch = self.target_actor_network(next_state_batch) noise = nd.normal(loc=0, scale=self.policy_noise, shape=next_action_batch.shape, ctx=self.ctx) # with noise clip noise = nd.clip(noise, a_min=-self.noise_clip, a_max=self.noise_clip) next_action_batch = next_action_batch + noise clipped_action = self.action_clip(next_action_batch) # get target q value target_q_value1 = self.target_critic_network1(next_state_batch, clipped_action) target_q_value2 = self.target_critic_network2(next_state_batch, clipped_action) target_q_value = nd.minimum(target_q_value1, target_q_value2).squeeze() target_q_value = reward_batch + (1.0 - done_batch) * (self.gamma * target_q_value) # get current q value current_q_value1 = self.main_critic_network1(state_batch, action_batch) current_q_value2 = self.main_critic_network2(state_batch, action_batch) loss = gloss.L2Loss() value_loss1 = loss(current_q_value1, target_q_value.detach()) value_loss2 = loss(current_q_value2, target_q_value.detach()) self.main_critic_network1.collect_params().zero_grad() value_loss1.backward() self.critic1_optimizer.step(self.batch_size) self.main_critic_network2.collect_params().zero_grad() value_loss2.backward() self.critic2_optimizer.step(self.batch_size) # ---------------optimize the actor network------------------------- if self.total_train_steps % self.policy_update == 0: with autograd.record(): pred_action_batch = self.main_actor_network(state_batch) actor_loss = -nd.mean(self.main_critic_network1(state_batch, pred_action_batch)) self.main_actor_network.collect_params().zero_grad() actor_loss.backward() self.actor_optimizer.step(1) self.soft_update(self.target_actor_network, self.main_actor_network) self.soft_update(self.target_critic_network1, self.main_critic_network1) self.soft_update(self.target_critic_network2, self.main_critic_network2)
def forward(self, x=0): if (mx.autograd.is_training()): u = nd.random.uniform(0, 1) s = nd.log(u) - nd.log(1 - u) + self._qz_loga.data() if (self._temperature == 0): s = nd.sign(s) else: s = nd.sigmoid(s / self._temperature) else: s = nd.sigmoid(self._qz_loga.data()) s = s * (self._limit_hi - self._limit_lo) + self._limit_lo return nd.minimum(1, nd.maximum(s, 0))
def _go_below(x): lower = nd.min(x, axis=0) lower = nd.minimum(lower, node._box._min_list.data()) upper = nd.max(x, axis=0) upper = nd.maximum(upper, node._box._max_list.data()) node._box._init_param("min_list", lower) node._box._init_param("max_list", upper) if (self._structure[node] is not None): l_node = next( key for key, value in self._structure[node].items() if value == -1) r_node = next( key for key, value in self._structure[node].items() if value == 1) decision = node._decision.forward(x, crisp=True) _shard(decision, x, _extend(l_node), _extend(r_node))
def update(self, obs, returns, masks, actions, values, logpacs): advantages = returns - values # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) advantages = nd.array(advantages, ctx=self.args.ctx).reshape((-1, 1)) obs = nd.array(obs, ctx=self.args.ctx).reshape( (-1, self.observation_dim)) actions = nd.array(actions, ctx=self.args.ctx).reshape( (-1, self.action_dim)) values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1)) returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1)) oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape( (-1, self.action_dim)) # Learning rate scheduling # self.trainer.set_learning_rate(lr) # Auto grad with autograd.record(): # Value loss vpred, mu, sigma = self.net(obs) vpred_clipped = values + nd.clip( vpred - values, -self.args.clip_param, self.args.clip_param) vf_loss1 = nd.square(vpred - returns) vf_loss2 = nd.square(vpred_clipped - returns) vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2)) # Action loss pi_log_prob = self.net.log_prob(actions, mu, sigma) ratio = nd.exp(pi_log_prob - oldpi_log_prob) surr1 = ratio * advantages surr2 = nd.clip(ratio, 1.0 - self.args.clip_param, 1.0 + self.args.clip_param) * advantages actor_loss = -nd.mean(nd.minimum(surr1, surr2)) # Entropy term entropy = self.net.entropy(sigma) # Total loss loss = vf_loss * self.args.value_coefficient + actor_loss \ - entropy * self.args.entropy_coefficient # Compute gradients and updates loss.backward() self.trainer.step(obs.shape[0])
def box_iou(b1, b2): '''Return iou tensor Parameters ---------- b1: tensor, shape=(i1,...,iN, 4), xywh b2: tensor, shape=(j, 4), xywh Returns ------- iou: tensor, shape=(i1,...,iN, j) ''' # Expand dim to apply broadcasting. b1 = nd.expand_dims(b1, -2) b1_xy = b1[:, :, :2] b1_wh = b1[:, :, 2:4] b1_wh_half = b1_wh/2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # Expand dim to apply broadcasting. b2 = nd.expand_dims(b2, 0) b2_xy = b2[:, :, :2] b2_wh = b2[:, :, 2:4] b2_wh_half = b2_wh/2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = nd.maximum(b1_mins, b2_mins) intersect_maxes = nd.minimum(b1_maxes, b2_maxes) intersect_wh = nd.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[:, :, 0] * intersect_wh[:, :, 1] b1_area = b1_wh[:, :, 0] * b1_wh[:, :, 1] b2_area = b2_wh[:, :, 0] * b2_wh[:, :, 1] iou = intersect_area / (b1_area + b2_area - intersect_area) return iou
def calIOU(anchor, gt): assert len(anchor.shape) in (1,2,3) assert len(gt.shape) in (1,2,3) anchor = anchor.reshape((-1,4)) if len(gt.shape) < 3: gt = gt.reshape((1,1,4)) if len(gt.shape) == 1 else nd.expand_dims(gt, axis=0) anchor = nd.expand_dims(anchor, axis=1) gt = nd.expand_dims(gt, axis=1) max_tl = nd.maximum(nd.take(anchor, nd.array([0,1]), axis=-1), nd.take(gt, nd.array([0,1]), axis=-1)) min_br = nd.minimum(nd.take(anchor, nd.array([2,3]), axis=-1), nd.take(gt, nd.array([2,3]), axis=-1)) area = nd.prod(min_br-max_tl, axis=-1) i = nd.where((max_tl >= min_br).sum(axis=-1), nd.zeros_like(area), area) anchor_area = nd.prod(anchor[:,:,2:]-anchor[:,:,:2], axis=-1) gt_area = nd.prod(gt[:,:,:,2:]-gt[:,:,:,:2], axis=-1) total_area = anchor_area + gt_area - i iou = i / total_area return iou
def intersect(box_a, box_b): """ We resize both tensors to [A,B,2] without new malloc: [A,2] -> [A,1,2] -> [A,B,2] [B,2] -> [1,B,2] -> [A,B,2] Then we compute the area of intersect between box_a and box_b. Args: box_a: (tensor) bounding boxes, Shape: [A,4]. box_b: (tensor) bounding boxes, Shape: [B,4]. Return: (tensor) intersection area, Shape: [A,B]. """ A = box_a.shape[0] B = box_b.shape[0] max_xy = nd.minimum( box_a[:, 2:].expand_dims(axis=1).repeat(axis=1, repeats=B), box_b[:, 2:].expand_dims(axis=0).repeat(axis=0, repeats=A)) min_xy = nd.maximum( box_a[:, :2].expand_dims(axis=1).repeat(axis=1, repeats=B), box_b[:, :2].expand_dims(axis=0).repeat(axis=0, repeats=A)) inter = nd.clip((max_xy - min_xy), 0, np.nan) return inter[:, :, 0] * inter[:, :, 1]
def update(self, obs, returns, actions, advantages, cliprange_now, entropy_coeff): advantages = nd.array(advantages) actions = nd.array(actions) returns = nd.array(returns) with autograd.record(): _, old_logits = self.oldpi.forward(obs) old_logp = self.oldpi.logp(old_logits, actions) new_vpred, new_logits = self.pi.forward(obs) new_vpred = new_vpred.reshape(new_vpred.shape[:-1]) new_logp = self.pi.logp(new_logits, actions) # Action loss ratio = nd.exp(new_logp - old_logp) surr1 = ratio * advantages surr2 = nd.clip(ratio, 1.0 - cliprange_now, 1.0 + cliprange_now) * advantages actor_loss = -nd.mean(nd.minimum(surr1, surr2)) # Value loss vf_loss1 = nd.square(new_vpred - returns) vf_loss = nd.mean(vf_loss1) # Entropy term entrpy = self.pi.entropy(new_logits) mean_entrpy = nd.mean(entrpy) ent_loss = (-entropy_coeff) * mean_entrpy loss = vf_loss + actor_loss + ent_loss # Compute gradients and updates loss.backward() self.trainer.step(1) # Loss are already normalized return actor_loss.asscalar(), vf_loss.asscalar(), ent_loss.asscalar()
def _go_above(x, tau): lower = nd.min(x, axis=0) lower = nd.minimum(lower, node._box._min_list.data()) upper = nd.max(x, axis=0) upper = nd.maximum(upper, node._box._max_list.data()) el = nd.maximum(node._box._min_list.data() - nd.min(x, axis=0), 0) eu = nd.maximum( nd.max(x, axis=0) - node._box._max_list.data(), 0) extent = nd.sum(el + eu) dim = nd.random.multinomial((el + eu) / extent) btm = el[dim] top = eu[dim] split = nd.random.multinomial( nd.concat(btm, top, dim=0) / (btm + top)) if (split == 0): split = nd.random.uniform(lower[dim], node._box._min_list.data()[dim]) elif (split == 1): split = nd.random.uniform(node._box._max_list.data()[dim], upper[dim]) with self.name_scope(): p_node = self._new_node( parent=node._box._parent, min_list=lower, max_list=upper, tau=tau, decision=lambda: Decision( split=split, dim=dim, gate=self._new_gate), embedding=node._embedding.data()) s_node = self._new_node(parent=p_node, embedding=node._embedding.data()) node._box._parent = p_node if (split < node._box._min_list.data()[dim]): # current node is right l_node = s_node r_node = node elif (split > node._box._max_list.data()[dim]): # current node is left l_node = node r_node = s_node self._structure[p_node] = {l_node: -1, r_node: 1} # p nodes parent also needs to reference p_node and the other child if (p_node._box._parent is not None): if (self._structure[p_node._box._parent][node] == -1): self._structure[p_node._box._parent][p_node] = -1 elif (self._structure[p_node._box._parent][node] == 1): self._structure[p_node._box._parent][p_node] = 1 self._structure[p_node._box._parent].pop(node) elif (p_node._box._parent is None): self._structure.move_to_end(p_node, last=False) self._weightlayer.add(*[p_node._box, s_node._box]) self._routerlayer.add(*[p_node._decision]) self._embeddlayer.add(*[p_node, s_node]) decision = p_node._decision.forward(x, crisp=True) _shard(decision, x, _extend(l_node), _extend(r_node))
def update(self): state = nd.array([t.state for t in self.buffer], ctx=self.ctx) action = nd.array([t.action for t in self.buffer], ctx=self.ctx) reward = [t.reward for t in self.buffer] # next_state = nd.array([t.next_state for t in self.buffer], ctx=self.ctx) old_action_log_prob = nd.array([t.a_log_prob for t in self.buffer], ctx=self.ctx) R = 0 Gt = [] for r in reward[::-1]: R = r + self.gamma * R Gt.insert(0, R) Gt = nd.array(Gt, ctx=self.ctx) # sample 'ppo_update_time' times # sample 'batch_size' samples every time for i in range(self.ppo_update_times): assert len(self.buffer) >= self.batch_size sample_index = random.sample(range(len(self.buffer)), self.batch_size) for index in sample_index: # optimize the actor network with autograd.record(): Gt_index = Gt[index] V = self.critic_network(state[index].reshape(1, -1)).detach() advantage = (Gt_index - V) all_action_prob = self.actor_network(state[index].reshape( 1, -1)) action_prob = nd.pick(all_action_prob, action[index]) ratio = action_prob / old_action_log_prob[index] surr1 = ratio * advantage surr2 = nd.clip(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage action_loss = -nd.mean(nd.minimum(surr1, surr2)) # attention self.actor_network.collect_params().zero_grad() action_loss.backward() actor_network_params = [ p.data() for p in self.actor_network.collect_params().values() ] gb.grad_clipping(actor_network_params, theta=self.clip_param, ctx=self.ctx) self.actor_optimizer.step(1) # optimize the critic network with autograd.record(): Gt_index = Gt[index] V = self.critic_network(state[index].reshape(1, -1)) loss = gloss.L2Loss() value_loss = nd.mean(loss(Gt_index, V)) self.critic_network.collect_params().zero_grad() value_loss.backward() critic_network_params = [ p.data() for p in self.critic_network.collect_params().values() ] gb.grad_clipping(critic_network_params, theta=self.clip_param, ctx=self.ctx) self.critic_optimizer.step(1) self.training_step += 1 # clear buffer del self.buffer[:]
def generate_targets(self, img, boxes): """ img : [H, W, 3] boxes : [N, 5] """ rh, rw, _ = img.shape rx = nd.arange(0, rw).reshape((1, -1)) ry = nd.arange(0, rh).reshape((-1, 1)) sx = nd.tile(rx, reps=(rh, 1)) sy = nd.tile(ry, reps=(1, rw)) areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) boxes = boxes[nd.argsort(areas)] boxes = nd.concat(nd.zeros((1, 5)), boxes, dim=0) # for gt assign confusion x0, y0, x1, y1, cls = nd.split(boxes, num_outputs=5, axis=-1, squeeze_axis=True) n = boxes.shape[0] # [H, W, N] of_l = sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x0, axis=0), axis=0) of_t = sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y0, axis=0), axis=0) of_r = -(sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x1, axis=0), axis=0)) of_b = -(sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y1, axis=0), axis=0)) # [H, W, N] eps = 1e-5 ctr =(nd.minimum(of_l, of_r) / nd.maximum(of_l, of_r)) * \ (nd.minimum(of_t, of_b) / nd.maximum(of_t, of_b) + eps) ctr = nd.sqrt(nd.abs(ctr)) ctr[:, :, 0] = 0 # [H, W, N, 4] offsets = nd.concat(of_l.reshape(-2, 1), of_t.reshape(-2, 1), of_r.reshape(-2, 1), of_b.reshape(-2, 1), dim=-1) # fh = int(np.ceil(((rh + 1) / 2) // 2 / 2)) # fw = int(np.ceil(((rw + 1) / 2) // 2 / 2)) fh = int(np.ceil(np.ceil(np.ceil(rh / 2) / 2) / 2)) fw = int(np.ceil(np.ceil(np.ceil(rw / 2) / 2) / 2)) fm_list = [] for i in range(self._stages): fm_list.append((fh, fw)) fh = int(np.ceil(fh / 2)) fw = int(np.ceil(fw / 2)) fm_list = fm_list[::-1] cls_targets = [] ctr_targets = [] box_targets = [] cor_targets = [] stride = self._stride for i in range(self._stages): fh, fw = fm_list[i] cls_target = nd.zeros((fh, fw)) box_target = nd.zeros((fh, fw, 4)) ctr_target = nd.zeros((fh, fw)) cx = nd.arange(0, fw).reshape((1, -1)) cy = nd.arange(0, fh).reshape((-1, 1)) sx = nd.tile(cx, reps=(fh, 1)) sy = nd.tile(cy, reps=(1, fw)) syx = nd.stack(sy.reshape(-1), sx.reshape(-1)).transpose().astype('int32') # bugs in this type # bx = sxy[:, 0] * stride + nd.floor(sxy[:, 0] / 2).astype(np.int32) # by = sxy[:, 1] * stride + nd.floor(sxy[:, 1] / 2).astype(np.int32) by = syx[:, 0] * stride bx = syx[:, 1] * stride cor_targets.append(nd.stack(bx, by, axis=1)) # [FH*FW, N, 4] of_byx = offsets[by, bx] # of_byx = nd.gather_nd(offsets, indices=byx.transpose()) min_vr, max_vr = self._valid_range[i] # [FH*FW, N] is_in_box = nd.prod(of_byx > 0, axis=-1) is_valid_area = (of_byx.max(axis=-1) >= min_vr) * (of_byx.max(axis=-1) <= max_vr) # [FH*FW, N] valid_pos = nd.elemwise_mul(is_in_box, is_valid_area) of_valid = nd.zeros((fh, fw, n)) of_valid[syx[:, 0], syx[:, 1], :] = valid_pos # 1, 0 of_valid[:, :, 0] = 0 # [FH, FW] gt_inds = nd.argmax(of_valid, axis=-1) # box targets box_target[syx[:, 0], syx[:, 1]] = boxes[gt_inds[syx[:, 0], syx[:, 1]], :4] box_target = box_target.reshape(-1, 4) # cls targets cls_target[syx[:, 0], syx[:, 1]] = cls[gt_inds[syx[:, 0], syx[:, 1]]] cls_target = cls_target.reshape(-1) # ctr targets ctr_target[syx[:, 0], syx[:, 1]] = ctr[by, bx, gt_inds[syx[:, 0], syx[:, 1]]] ctr_target = ctr_target.reshape(-1) box_targets.append(box_target) cls_targets.append(cls_target) ctr_targets.append(ctr_target) stride = int(stride / 2) box_targets = nd.concat(*box_targets, dim=0) cls_targets = nd.concat(*cls_targets, dim=0) ctr_targets = nd.concat(*ctr_targets, dim=0) cor_targets = nd.concat(*cor_targets, dim=0) cor_targets = cor_targets.astype('float32') return cls_targets, ctr_targets, box_targets, cor_targets
def BReLU(x): return nd.minimum(1., nd.maximum(0., x))
tree._grow(nd.array([[1, 1]])) # %% from Node import Node Node() node.collect_params() # %% a = nd.array([[1, 2], [3, 4], [-10, -10]]) a upper = nd.max(a, axis=0) lower = nd.min(a, axis=0) e = nd.random.exponential(1 / nd.sum(upper - lower)) (upper, lower, e) nd.random.multinomial(nd.array([0.5, 0.5]), 10) # %% if (nd.sum(nd.array([0, 0])) == 0): print("yay") nd.minimum(nd.array([4, 5]), nd.array([0, 6]))
noise_batch = noise_std * nd.random.normal(shape=(batch_size, num_measurements)) x_batch = x_batch.reshape((batch_size, 784)) y_batch = nd.dot(x_batch, A) + noise_batch ######################## ### Lasso ######################## x_hat_batch_Lasso = nd.zeros([batch_size, 784]) lasso_est = Lasso(alpha=lmbd) for i in range(batch_size): y_val = y_batch[i] lasso_est.fit(A.T.asnumpy(), y_val.reshape(num_measurements).asnumpy()) x_hat_lasso = nd.array(lasso_est.coef_) x_hat_lasso = nd.reshape(x_hat_lasso, [-1]) x_hat_lasso = nd.maximum(nd.minimum(x_hat_lasso, 1), 0) x_hat_batch_Lasso[i] = x_hat_lasso ######################## ### OMP Algorithm ######################## omp_est = OrthogonalMatchingPursuit(n_nonzero_coefs=num_measurements / 2) x_hat_batch_OMP = nd.zeros([batch_size, 784]) for i in range(batch_size): y_val = y_batch[i] omp_est.fit(A.T.asnumpy(), y_val.reshape(num_measurements).asnumpy()) x_hat_OMP = nd.array(omp_est.coef_) x_hat_OMP = nd.reshape(x_hat_OMP, [-1]) x_hat_OMP = nd.maximum(nd.minimum(x_hat_OMP, 1), 0) x_hat_batch_OMP[i] = x_hat_OMP