def yolo2_forward(x, num_class, anchor_scales): """Transpose/reshape/organize convolution outputs.""" stride = num_class + 5 # transpose and reshape, 4th dim is the number of anchors x = x.transpose((0, 2, 3, 1)) x = x.reshape((0, 0, 0, -1, stride)) # now x is (batch, m, n, stride), stride = num_class + 1(object score) + 4(coordinates) # class probs cls_pred = x.slice_axis(begin=0, end=num_class, axis=-1) # object score score_pred = x.slice_axis(begin=num_class, end=num_class + 1, axis=-1) score = nd.sigmoid(score_pred) # center prediction, in range(0, 1) for each grid xy_pred = x.slice_axis(begin=num_class + 1, end=num_class + 3, axis=-1) xy = nd.sigmoid(xy_pred) # width/height prediction wh = x.slice_axis(begin=num_class + 3, end=num_class + 5, axis=-1) # convert x, y to positions relative to image x, y = transform_center(xy) # convert w, h to width/height relative to image w, h = transform_size(wh, anchor_scales) # cid is the argmax channel cid = nd.argmax(cls_pred, axis=-1, keepdims=True) # convert to corner format boxes half_w = w / 2 half_h = h / 2 left = nd.clip(x - half_w, 0, 1) top = nd.clip(y - half_h, 0, 1) right = nd.clip(x + half_w, 0, 1) bottom = nd.clip(y + half_h, 0, 1) output = nd.concat(*[cid, score, left, top, right, bottom], dim=4) # 为什么left和top有很多0? return output, cls_pred, score, nd.concat(*[xy, wh], dim=4)
def bgr2hsi(x): """ x:n,c(b,g,r),w,h return n,c(h,s,i),w,h """ sum_RGB = nd.sum(x.astype('float32'), axis=1) R = x[:, 0, :, :].astype('float32') G = x[:, 1, :, :].astype('float32') B = x[:, 2, :, :].astype('float32') r = (R + eps) / (sum_RGB + 3 * eps) g = (G + eps) / (sum_RGB + 3 * eps) b = (B + eps) / (sum_RGB + 3 * eps) cossita = (2 * r - g - b) / (2 * ((r - g)**2 + (r - b) * (g - b))**(1.0 / 2) + eps) cossita_cilp = nd.clip(cossita, -1.0, 1.0) sita = nd.arccos(cossita_cilp) h = (nd.where(g >= b, sita, 2 * math.pi - sita)).expand_dims(axis=1) s = (1 - 3 * nd.minimum(nd.minimum(r, g), b)).expand_dims(axis=1) s = nd.clip(s, 0., 1.) i = ((R + G + B) / 3).expand_dims(axis=1) return nd.concat(h, s, i, dim=1)
def check_tbox(image, label): plt.clf() rgb_mean = RGB_MEAN.as_in_context(image.context) rgb_std = RGB_STD.as_in_context(image.context) assert label.shape == (1, 5), \ "shape of label expected [1, 5], but given {}".format(label.shape) assert image.shape == (3, 256, 256), \ "shape of image expected [3, 256, 256], given {}".format(image.shape) scores_tmp = nd.zeros((1, 16, 16, 3, 1)) label = label.expand_dims(axis=0) tid, tscore, tbox, _ = yolo2_target(scores_tmp, label, anchor_scales) t_xy = tbox.slice_axis(begin=0, end=2, axis=-1) t_wh = tbox.slice_axis(begin=2, end=4, axis=-1) xy = nd.sigmoid(t_xy) x, y = transform_center(xy) w, h = transform_size(t_wh, anchor_scales) left = nd.clip(x - w / 2, 0, 1) top = nd.clip(y - h / 2, 0, 1) right = nd.clip(x + w / 2, 0, 1) bottom = nd.clip(y + h / 2, 0, 1) output = nd.concat(*[tid, tscore, left, top, right, bottom], dim=-1) out = nd.contrib.box_nms(output.reshape((0, -1, 6))) out = out.asnumpy() box = out[0][0][2:6] * np.array([image.shape[1], image.shape[2]] * 2) rect = box_to_rect(nd.array(box), 'green', 2) image = image.transpose((1, 2, 0)) i0 = (image * rgb_std + rgb_mean).asnumpy() i0 = i0.clip(0, 255) / 255. plt.imshow(i0) plt.gca().add_patch(rect) plt.show() #plt.savefig('check_tbox.jpg') return box
def action_clip(self, action): if len(action[0]) == 2: action0 = nd.clip(action[:, 0], float(self.action_bound[0][0].asnumpy()), float(self.action_bound[0][1].asnumpy())) action1 = nd.clip(action[:, 1], float(self.action_bound[1][0].asnumpy()), float(self.action_bound[1][1].asnumpy())) clipped_action = nd.concat(action0.reshape(-1, 1), action1.reshape(-1, 1)) else: clipped_action = nd.clip(action, float(self.action_bound[0][0].asnumpy()), float(self.action_bound[0][1].asnumpy())) return clipped_action
def forward(self, adj, feat): r"""Compute (Dense) Graph Convolution layer. Parameters ---------- adj : mxnet.NDArray The adjacency matrix of the graph to apply Graph Convolution on, when applied to a unidirectional bipartite graph, ``adj`` should be of shape should be of shape :math:`(N_{out}, N_{in})`; when applied to a h**o graph, ``adj`` should be of shape :math:`(N, N)`. In both cases, a row represents a destination node while a column represents a source node. feat : torch.Tensor The input feature. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ adj = adj.astype(feat.dtype).as_in_context(feat.context) src_degrees = nd.clip(adj.sum(axis=0), a_min=1, a_max=float('inf')) dst_degrees = nd.clip(adj.sum(axis=1), a_min=1, a_max=float('inf')) feat_src = feat if self._norm == 'both': norm_src = nd.power(src_degrees, -0.5) shp_src = norm_src.shape + (1, ) * (feat.ndim - 1) norm_src = norm_src.reshape(shp_src).as_in_context(feat.context) feat_src = feat_src * norm_src if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. feat_src = nd.dot(feat_src, self.weight.data(feat_src.context)) rst = nd.dot(adj, feat_src) else: # aggregate first then mult W rst = nd.dot(adj, feat_src) rst = nd.dot(rst, self.weight.data(feat_src.context)) if self._norm != 'none': if self._norm == 'both': norm_dst = nd.power(dst_degrees, -0.5) else: # right norm_dst = 1.0 / dst_degrees shp_dst = norm_dst.shape + (1, ) * (feat.ndim - 1) norm_dst = norm_dst.reshape(shp_dst).as_in_context(feat.context) rst = rst * norm_dst if self.bias is not None: rst = rst + self.bias.data(feat.context) if self._activation is not None: rst = self._activation(rst) return rst
def getDefaultBoxes(fmap, s=None, r=None, offset=None, norm=None, clip=False, srmode='few', omode='flatten'): assert omode in ('flatten', 'stack') assert srmode in ('few', 'many') n, c, fh, fw = fmap.shape if s is None: scales = nd.array([1.]) else: scales = nd.array(s) if r is None: ratios = nd.array([1.]) else: ratios = nd.array(r) width, height = getwh(scales, ratios, fw, fh, srmode) nbox_per_pixel = width.size xcenter = nd.repeat(nd.arange(fw).reshape((1,-1)), fh, axis=0) ycenter = nd.repeat(nd.arange(fh).reshape((-1,1)), fw, axis=1) xycenters = nd.stack(xcenter, ycenter, axis=2) xycenters = nd.tile(xycenters, [1, 1, nbox_per_pixel*2]) lu_rd_offset = nd.stack(width*-0.5, height*-0.5, width*0.5, height*0.5, axis=1) lu_rd_offset = lu_rd_offset.reshape((-1,)) lu_rd_points = (xycenters + lu_rd_offset).reshape((fh, fw, nbox_per_pixel, 2, 2)) if offset is None: offset = nd.array([0.5, 0.5]) else: offset = nd.array(offset) assert offset.size <= 2 if norm is None: norm = nd.array([fw, fh]) else: norm = nd.array(norm) assert norm.size <= 2 lu_rd_points = (lu_rd_points + offset) / norm if clip: nd.clip(lu_rd_points, a_min=0., a_max=1., out=lu_rd_points) if omode == 'flatten': lu_rd_points = lu_rd_points.reshape((1, -1, 4)) else: lu_rd_points = lu_rd_points.reshape((1, fh, fw, nbox_per_pixel, 4)) return lu_rd_points
def box_ciou(b1, b2): """ 输入为: ---------- b1: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh b2: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh 返回为: ------- ciou: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 1) """ # 求出预测框左上角右下角 b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh / 2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # 求出真实框左上角右下角 b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh / 2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half # 求真实框和预测框所有的iou intersect_mins = nd.max(b1_mins, b2_mins) intersect_maxes = nd.min(b1_maxes, b2_maxes) intersect_wh = nd.max(intersect_maxes - intersect_mins, nd.zeros_like(intersect_maxes)) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] union_area = b1_area + b2_area - intersect_area iou = intersect_area / nd.clip(union_area, a_min=1e-6) # 计算中心的差距 center_distance = nd.sum(nd.power((b1_xy - b2_xy), 2), axis=-1) # 找到包裹两个框的最小框的左上角和右下角 enclose_mins = nd.min(b1_mins, b2_mins) enclose_maxes = nd.max(b1_maxes, b2_maxes) enclose_wh = nd.max(enclose_maxes - enclose_mins, nd.zeros_like(intersect_maxes)) # 计算对角线距离 enclose_diagonal = nd.sum(nd.power(enclose_wh, 2), axis=-1) ciou = iou - 1.0 * (center_distance) / nd.clip(enclose_diagonal, a_min=1e-6) v = (4 / (math.pi**2)) * nd.power( (nd.arctan(b1_wh[..., 0] / nd.clip(b1_wh[..., 1], min=1e-6)) - nd.arctan(b2_wh[..., 0] / nd.clip(b2_wh[..., 1], a_min=1e-6))), 2) alpha = v / nd.clip((1.0 - iou + v), a_max=1e-6) ciou = ciou - alpha * v return ciou
def gan_mse(p, g, device): #return (p - mx.nd.ones_like(p, ctx = device)) ** 2 if g == 'real' else (p - mx.nd.zeros_like(p, ctx = device)) ** 2 #return mx.nd.abs(p - mx.nd.ones_like(p, ctx = device)) if g == 'real' else mx.nd.abs(p - mx.nd.zeros_like(p, ctx = device)) g = mx.nd.ones_like(p) if g == 'real' else mx.nd.zeros_like(p) #g = mx.nd.ones_like(p) + mx.nd.random.normal(loc = 0, scale = 0.1, ctx = device) if g == 'real' else mx.nd.zeros_like(p) + \ # mx.random.normal(loc = 0, scale = 0.1, ctx = device) return -nd.clip(g, 0, 1) * nd.log(nd.clip( p, 1e-5, 1)) - (1 - nd.clip(g, 0, 1)) * nd.log(nd.clip(1 - p, 1e-5, 1))
def bbox_iou(box1, box2, transform=True): """Calculate the IoU Error """ #Change to NDArray if not if not isinstance(box1, nd.NDArray): box1 = nd.array(box1) if not isinstance(box2, nd.NDArray): box2 = nd.array(box2) #Make sure > 0 box1 = nd.abs(box1) box2 = nd.abs(box2) '''Calculate the IoU''' if transform: tmp_box1 = box1.copy() tmp_box1[:, 0] = box1[:, 0] - box1[:, 2] / 2.0 tmp_box1[:, 1] = box1[:, 1] - box1[:, 3] / 2.0 tmp_box1[:, 2] = box1[:, 0] + box1[:, 2] / 2.0 tmp_box1[:, 3] = box1[:, 1] + box1[:, 3] / 2.0 box1 = tmp_box1 tmp_box2 = box2.copy() tmp_box2[:, 0] = box2[:, 0] - box2[:, 2] / 2.0 tmp_box2[:, 1] = box2[:, 1] - box2[:, 3] / 2.0 tmp_box2[:, 2] = box2[:, 0] + box2[:, 2] / 2.0 tmp_box2[:, 3] = box2[:, 1] + box2[:, 3] / 2.0 box2 = tmp_box2 # Get the coordinates of bounding boxes (xStart,yStart,xEnd,yEnd) b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] # get the corrdinates of the intersection rectangle inter_rect_x1 = nd.where( b1_x1 > b2_x1, b1_x1, b2_x1 ) #if b1_x1 > b2_x1 => x1 of the intersection rectangle must be b1_x1, otherwise it will be b2_x1. Basically it's just a max function! inter_rect_y1 = nd.where(b1_y1 > b2_y1, b1_y1, b2_y1) inter_rect_x2 = nd.where(b1_x2 < b2_x2, b1_x2, b2_x2) inter_rect_y2 = nd.where(b1_y2 < b2_y2, b1_y2, b2_y2) # Intersection area inter_area = nd.clip( inter_rect_x2 - inter_rect_x1 + 1, a_min=0, a_max=10000) * nd.clip( inter_rect_y2 - inter_rect_y1 + 1, a_min=0, a_max=10000) # Union Area b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area) return nd.clip(iou, 1e-5, 1. - 1e-5)
def SiameseForward(cls_pred, bbox_pred, anchor_scales, Training=True): num_anchor = len(anchor_scales) cls_pred = mx.ndarray.transpose(cls_pred, (0, 2, 3, 1)) cls_pred = mx.ndarray.reshape(cls_pred, (0, 0, 0, num_anchor, -1)) bbox_pred = mx.ndarray.transpose(bbox_pred, (0, 2, 3, 1)) bbox_pred = mx.ndarray.reshape(bbox_pred, (0, 0, 0, num_anchor, -1)) #print(bbox_pred.shape ) xy = bbox_pred.slice_axis(begin=0, end=2, axis=-1) xy = mx.ndarray.sigmoid(xy) x, y = transform_center(xy) wh = bbox_pred.slice_axis(begin=2, end=4, axis=-1) w, h = transform_size(wh, anchor_scales) # cid is the argmax channel cid = nd.argmax(cls_pred, axis=-1, keepdims=True) # print(cls_pred.shape) # print(cid.shape) half_w = w / 2 half_h = h / 2 left = nd.clip(x - half_w, 0, 1) top = nd.clip(y - half_h, 0, 1) right = nd.clip(x + half_w, 0, 1) bottom = nd.clip(y + half_h, 0, 1) #output = nd.concat(*[cid,left, top, right, bottom], dim=4) if Training: return cls_pred, nd.concat(*[xy, wh], dim=4) if not Training: score = nd.softmax(cls_pred, axis=-1) score = nd.max(score, axis=-1, keepdims=True) # discard = _FarAwayCenter(score) # score = discard * score # # score = mx.ndarray.reshape(score,(0,0,0,-1)) # print(score.shape) # cos_window =_cosine_window(score) # score = score *cos_window # score = mx.ndarray.reshape(score, (0, 0, 0, num_anchor,-1)) # #output = nd.concat(*[cid, score, left, top, right, bottom], dim=4) p_w = right - left p_h = bottom - top return cid, score, nd.concat(*[left, top, right, bottom], dim=4), p_w, p_h
def bbox_iou(box1, box2, transform=True, ctx=None): ''' 判断预测盒子和实际盒子的重合度。>0.5是比较好的预测 ''' ctx = ctx if not isinstance(box1, nd.NDArray): box1 = nd.array(box1, ctx=ctx) if not isinstance(box2, nd.NDArray): box2 = nd.array(box2, ctx=ctx) box1 = nd.abs(box1) box2 = nd.abs(box2) if transform: tmp_box1 = box1.copy() tmp_box1[:, 0] = box1[:, 0] - box1[:, 2] / 2.0 tmp_box1[:, 1] = box1[:, 1] - box1[:, 3] / 2.0 tmp_box1[:, 2] = box1[:, 0] + box1[:, 2] / 2.0 tmp_box1[:, 3] = box1[:, 1] + box1[:, 3] / 2.0 box1 = tmp_box1 tmp_box2 = box2.copy() tmp_box2[:, 0] = box2[:, 0] - box2[:, 2] / 2.0 tmp_box2[:, 1] = box2[:, 1] - box2[:, 3] / 2.0 tmp_box2[:, 2] = box2[:, 0] + box2[:, 2] / 2.0 tmp_box2[:, 3] = box2[:, 1] + box2[:, 3] / 2.0 box2 = tmp_box2 # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] # get the corrdinates of the intersection rectangle inter_rect_x1 = nd.where(b1_x1 > b2_x1, b1_x1, b2_x1) inter_rect_y1 = nd.where(b1_y1 > b2_y1, b1_y1, b2_y1) inter_rect_x2 = nd.where(b1_x2 < b2_x2, b1_x2, b2_x2) inter_rect_y2 = nd.where(b1_y2 < b2_y2, b1_y2, b2_y2) # Intersection area inter_area = nd.clip( inter_rect_x2 - inter_rect_x1 + 1, a_min=0, a_max=10000) * nd.clip( inter_rect_y2 - inter_rect_y1 + 1, a_min=0, a_max=10000) # Union Area b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area) # iou[inter_area >= b1_area] = 0.8 # iou[inter_area >= b2_area] = 0.8 return nd.clip(iou, 1e-5, 1. - 1e-5)
def update(self, obs, returns, masks, actions, values, logpacs, lrnow, cliprange_now): advantages = returns - values advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) advantages = nd.array(advantages, ctx=self.args.ctx) # .reshape((-1, 1)) obs = np.transpose(obs, (0, 3, 1, 2)) obs = nd.array(obs, ctx=self.args.ctx) actions = nd.array(actions, ctx=self.args.ctx).reshape((-1, 1)) values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1)) returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1)) oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape((-1, 1)) # self.trainer.set_learning_rate(lrnow) # Auto grad with autograd.record(): # Value loss vpred, logits = self.net(obs) vpred_clipped = values + nd.clip(vpred - values, -cliprange_now, cliprange_now) vf_loss1 = nd.square(vpred - returns) vf_loss2 = nd.square(vpred_clipped - returns) vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2)) # Action loss # pi_log_prob = self.net.log_prob(logits, actions) pi_log_prob = nd.pick(logits, actions, 1) ratio = nd.exp(pi_log_prob - oldpi_log_prob) surr1 = ratio * advantages surr2 = nd.clip(ratio, 1.0 - cliprange_now, 1.0 + cliprange_now) * advantages actor_loss = -nd.mean(nd.minimum(surr1, surr2)) # Entropy term # entropy = self.net.entropy(logits) # Total loss # loss = vf_loss * self.args.value_coefficient + actor_loss # - entropy * self.args.entropy_coefficient loss = vf_loss + actor_loss # Compute gradients and updates loss.backward() self.trainer.step(obs.shape[0]) return actor_loss.asscalar(), vf_loss.asscalar() #, entropy.asscalar()
def log_rmse(net, features, labels): # 将小于1的值设成1,使得取对数时数值更稳定 # float('inf') 表示无穷大, # 所以nd.clip(net(features), 1, float('inf'))执行完成后只会将小于1的变为1 clipped_preds = nd.clip(net(features), 1, float('inf')) rmse = nd.sqrt(2 * loss(clipped_preds.log(), labels.log()).mean()) return rmse.asscalar()
def forward(self, rcnn_cls_pred, rcnn_bbox_pred, rcnn_cls_gt, rcnn_bbox_gt): with autograd.pause(): ctx = rcnn_cls_pred.context roi_num = rcnn_cls_pred.shape[0] roi_idx = nd.arange(roi_num, ctx=ctx).reshape(-1, 1) fg_bbox_mask = (rcnn_cls_gt > 0).reshape(0, 1, 1) bbox_weights = nd.zeros_like(rcnn_bbox_gt).reshape(0, -1, 4) bbox_weights[roi_idx, rcnn_cls_gt[:], :] = \ self._bbox_weights.data(ctx).broadcast_to((roi_num, 1, 4)) * fg_bbox_mask bbox_weights = bbox_weights.reshape(0, -1) # rcnn_cls_pred.shape (roi_num, num_classes) rcnn_cls_log = nd.log(nd.clip(rcnn_cls_pred, 1e-14, 1)) cls_log_loss = -nd.sum(rcnn_cls_log[ roi_idx, rcnn_cls_gt]) / self._roi_batch_size.data(ctx) # rcnn_bbox_pred.shape (roi_num, num_classes*4) rcnn_bbox_smooth_l1 = nd.smooth_l1(rcnn_bbox_pred - rcnn_bbox_gt, scalar=1.0) bbox_smooth_l1_loss = nd.sum( rcnn_bbox_smooth_l1 * bbox_weights) / self._roi_batch_size.data(ctx) return cls_log_loss, bbox_smooth_l1_loss
def add(self, bg_batch, r_max, add_rate=1.0): ctx = bg_batch.context bs = bg_batch.shape[0] h = bg_batch.shape[2] w = bg_batch.shape[3] mask_batch = nd.zeros_like(bg_batch) image_batch = nd.zeros_like(bg_batch) label_batch = nd.ones((bs, 1, 10), ctx=ctx) * (-1) for i in range(bs): if np.random.rand() > add_rate: continue LP, LP_type, _ = self.draw_LP() output_size = (h, w) input_size = (self.project_rect_6d.camera_h, self.project_rect_6d.camera_w) mask, image, label = self.random_projection_LP_6D( LP, input_size, output_size, r_max) mask_batch[i] = mask.as_in_context(ctx) image_batch[i] = image.as_in_context(ctx) label_batch[i, :, :-1] = label label_batch[i, :, -1] = LP_type img_batch = bg_batch * (1 - mask_batch) + image_batch * mask_batch img_batch = nd.clip(img_batch, 0, 1) return img_batch, label_batch
def old_update(self, b_s, b_a, b_r, b_logpac): b_s = nd.array(b_s, ctx=self.args.ctx).reshape( (-1, self.observation_dim)) b_a = nd.array(b_a, ctx=self.args.ctx).reshape((-1, self.action_dim)) b_r = nd.array(b_r, ctx=self.args.ctx).reshape((-1, 1)) b_oldpi_log_prob = nd.array(b_logpac, ctx=self.args.ctx).reshape( (-1, self.action_dim)) with autograd.record(): # Value loss v_pred, mu, sigma = self.net(b_s) advantage = b_r - v_pred vf_loss = nd.mean(nd.square(advantage)) # Detach from the computation graph advantage = advantage.detach() # Action loss pi_log_prob = self.net.log_prob(b_a, mu, sigma) ratio = nd.exp(pi_log_prob - b_oldpi_log_prob) surr1 = ratio * advantage surr2 = nd.clip(ratio, 1.0 - self.args.clip_param, 1.0 + self.args.clip_param) * advantage actor_loss = -nd.mean(nd.minimum(surr1, surr2)) entropy = self.net.entropy(sigma) # Total (maximize entropy to encourage exploration) loss = vf_loss * self.args.value_coefficient + actor_loss \ - entropy * self.args.entropy_coefficient loss.backward() self.trainer.step(b_s.shape[0])
def log_rmse(net,features,labels): # <1的数设置为1,取对数时候的值就会更稳定! # limits the values of a tensor to between min and max.[nd.clip(x,min,max)] clipped_preds = nd.clip(net(features),1,float('inf')) # 下面的2是为了抵消掉L2Loss的自带1/2的乘子 ---> Σ(y-y_hat)² rmse = nd.sqrt(2*loss(clipped_preds.log(),labels.log()).mean()) return rmse.asscalar()
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def _compute_yolo_iou(self, F, boxes1, boxes2): ''' IoU of corresponding anchors ''' # to corner representation x11 = boxes1[:, :, :, :, 0] - boxes1[:, :, :, :, 2] / 2.0 y11 = boxes1[:, :, :, :, 1] - boxes1[:, :, :, :, 3] / 2.0 x12 = boxes1[:, :, :, :, 0] + boxes1[:, :, :, :, 2] / 2.0 y12 = boxes1[:, :, :, :, 1] + boxes1[:, :, :, :, 3] / 2.0 boxes1_new = nd.stack([x11, y11, x12, y12], axis=-1) x21 = boxes2[:, :, :, :, 0] - boxes2[:, :, :, :, 2] / 2.0 y21 = boxes2[:, :, :, :, 1] - boxes2[:, :, :, :, 3] / 2.0 x22 = boxes2[:, :, :, :, 0] + boxes2[:, :, :, :, 2] / 2.0 y22 = boxes2[:, :, :, :, 1] + boxes2[:, :, :, :, 3] / 2.0 boxes2_new = nd.stack([x21, y21, x22, y22], axis=-1) # calculating 2 border points upperleft = nd.maximum(boxes1_new[:, :, :, :, :2], boxes2_new[:, :, :, :, :2]) lowerright = nd.minimum(boxes1_new[:, :, :, :, 2:], boxes2_new[:, :, :, :, 2:]) intersection_dims = nd.maximum(0.0, lowerright - upperleft) intersection_area = intersection_dims[:, :, :, :, 0] * intersection_dims[:, :, :, :, 1] area1 = boxes1_new[:, :, :, :, 3] * boxes1_new[:, :, :, :, 2] area2 = boxes2_new[:, :, :, :, 3] * boxes2_new[:, :, :, :, 2] union_area = nd.maximum(1e-8, area1 + area2 - intersection_area) return nd.clip(intersection_area / union_area, a_min=0.0, a_max=1.0)
def implement_1(self, x, label): ''' following paper to implement ''' # weight normalize with x.context: w = self.weight.data() w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape((-1, 1)) # cos_theta = x'w/|x|. note: |w| = 1 x_norm = nd.power(x, 2) x_norm = nd.sum(x_norm, axis=1) x_norm = nd.sqrt(x_norm) cos_theta = nd.dot(x, w_norm, transpose_b=True) cos_theta = cos_theta / x_norm.reshape((-1, 1)) cos_theta = nd.clip(cos_theta, -1, 1) # cos_m_theta = cos(m * theta) cos_m_theta = self.margin_cos[self.margin](cos_theta) # k with mx.autograd.pause(): theta = nd.arccos(cos_theta) k = nd.sign((self.margin * theta / math.pi)) # i=j is phi_theta and i!=j is cos_theta phi_theta = ((-1)**k) * cos_m_theta - 2 * k x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta x_norm_cos_theta = x_norm.reshape((-1, 1)) * cos_theta # i=j index with mx.autograd.pause(): index = nd.one_hot(label, x_norm_phi_theta.shape[1]) # output with mx.autograd.pause(): lamb = self.__get_lambda() output = x_norm_cos_theta * 1.0 output = output - x_norm_cos_theta * index / (1 + lamb) output = output + x_norm_phi_theta * index / (1 + lamb) return output
def log_rmse(features, labels, net, loss): print('1 ', net.collect_params()) print('2 ', features) print('3 ', net(features)) clipped_preds = nd.clip(net(features), 1, float('inf')) rmse = nd.sqrt((2 * loss(clipped_preds.log(), labels.log())).mean()) return rmse.asscalar()
def dynamic_range_compression(x, c=1, clip_val=1e-5): """ params ------ c: compression factor """ return nd.log(nd.clip(x, a_min=clip_val, a_max=x.max().asscalar())) * c
def embedding(data_iterator, net, ctx=mx.cpu()): convnet_codes = None resize_images = None labels = None for i, batch in enumerate(data_iterator): data, label = _get_batch(batch, ctx) idx = nd.arange(data.shape[0]) _, output = net(data) output = output[idx.as_in_context(ctx), :, label] output.wait_to_read() if convnet_codes is None: convnet_codes = output else: convnet_codes = nd.concat(*[convnet_codes, output], dim=0) if labels is None: labels = label else: labels = nd.concat(*[labels, label], dim=0) images = data.copyto(mx.cpu()) if images.shape[1] != 1: images[:, 0, :, :] += 0.4914 images[:, 1, :, :] += 0.4822 images[:, 2, :, :] += 0.4465 images = nd.clip(images * 255, 0, 255).astype('uint8') if resize_images is None: resize_images = images else: resize_images = nd.concat(*[resize_images, images], dim=0) nd.save('convet.ndarray', convnet_codes.as_in_context(mx.cpu())) nd.save('resize_image.ndarray', resize_images) nd.save('label.ndarray', labels.astype('int32').as_in_context(mx.cpu()))
def mmd_loss(x, y, ctx_model, t=0.1, kernel='diffusion'): ''' computes the mmd loss with information diffusion kernel :param x: batch_size x latent dimension :param y: :param t: :return: ''' eps = 1e-6 n,d = x.shape if kernel == 'tv': sum_xx = nd.zeros(1, ctx=ctx_model) for i in range(n): for j in range(i+1, n): sum_xx = sum_xx + nd.norm(x[i] - x[j], ord=1) sum_xx = sum_xx / (n * (n-1)) sum_yy = nd.zeros(1, ctx=ctx_model) for i in range(y.shape[0]): for j in range(i+1, y.shape[0]): sum_yy = sum_yy + nd.norm(y[i] - y[j], ord=1) sum_yy = sum_yy / (y.shape[0] * (y.shape[0]-1)) sum_xy = nd.zeros(1, ctx=ctx_model) for i in range(n): for j in range(y.shape[0]): sum_xy = sum_xy + nd.norm(x[i] - y[j], ord=1) sum_yy = sum_yy / (n * y.shape[0]) else: qx = nd.sqrt(nd.clip(x, eps, 1)) qy = nd.sqrt(nd.clip(y, eps, 1)) xx = nd.dot(qx, qx, transpose_b=True) yy = nd.dot(qy, qy, transpose_b=True) xy = nd.dot(qx, qy, transpose_b=True) def diffusion_kernel(a, tmpt, dim): # return (4 * np.pi * tmpt)**(-dim / 2) * nd.exp(- nd.square(nd.arccos(a)) / tmpt) return nd.exp(- nd.square(nd.arccos(a)) / tmpt) off_diag = 1 - nd.eye(n, ctx=ctx_model) k_xx = diffusion_kernel(nd.clip(xx, 0, 1-eps), t, d-1) k_yy = diffusion_kernel(nd.clip(yy, 0, 1-eps), t, d-1) k_xy = diffusion_kernel(nd.clip(xy, 0, 1-eps), t, d-1) sum_xx = (k_xx * off_diag).sum() / (n * (n-1)) sum_yy = (k_yy * off_diag).sum() / (n * (n-1)) sum_xy = 2 * k_xy.sum() / (n * n) return sum_xx + sum_yy - sum_xy
def augment(points, xforms, r=None): points_xformed = nd.batch_dot(points, xforms, name='points_xformed') if r is None: return points_xformed jitter_data = r * mx.random.normal(shape=points_xformed.shape) jitter_clipped = nd.clip(jitter_data, -5 * r, 5 * r, name='jitter_clipped') return points_xformed + jitter_clipped
def calMAE(net, features, labels): clipped_preds = nd.clip(net(features), 1, float('inf')) mae_error = 0 i = 0 for element in (labels.log()-clipped_preds.log()): i += 1 mae_error += element.abs() return (mae_error/i).asscalar()
def bbox_iou(box1, box2, transform=True): """ Returns the IoU of two bounding boxes """ box1 = nd.array(box1) box2 = nd.array(box2) if box1.size == 0 or box2.size == 0: raise ValueError box1 = nd.abs(box1) box2 = nd.abs(box2) if transform: tmp_box1 = box1.copy() tmp_box1[:, 0] = box1[:, 0] - box1[:, 2] / 2.0 tmp_box1[:, 1] = box1[:, 1] - box1[:, 3] / 2.0 tmp_box1[:, 2] = box1[:, 0] + box1[:, 2] / 2.0 tmp_box1[:, 3] = box1[:, 1] + box1[:, 3] / 2.0 box1 = tmp_box1 tmp_box2 = box2.copy() tmp_box2[:, 0] = box2[:, 0] - box2[:, 2] / 2.0 tmp_box2[:, 1] = box2[:, 1] - box2[:, 3] / 2.0 tmp_box2[:, 2] = box2[:, 0] + box2[:, 2] / 2.0 tmp_box2[:, 3] = box2[:, 1] + box2[:, 3] / 2.0 box2 = tmp_box2 # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] # get the corrdinates of the intersection rectangle inter_rect_x1 = nd.where(b1_x1 > b2_x1, b1_x1, b2_x1) inter_rect_y1 = nd.where(b1_y1 > b2_y1, b1_y1, b2_y1) inter_rect_x2 = nd.where(b1_x2 < b2_x2, b1_x2, b2_x2) inter_rect_y2 = nd.where(b1_y2 < b2_y2, b1_y2, b2_y2) # Intersection area inter_area = nd.clip( inter_rect_x2 - inter_rect_x1 + 1, a_min=0, a_max=10000) * nd.clip( inter_rect_y2 - inter_rect_y1 + 1, a_min=0, a_max=10000) # Union Area b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area) # iou[inter_area >= b1_area] = 0.8 # iou[inter_area >= b2_area] = 0.8 # iou[inter_area >= b2_area] = 0.8 return nd.clip(iou, 1e-5, 1. - 1e-5)
def update(self, obs, returns, masks, actions, values, logpacs): advantages = returns - values # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) advantages = nd.array(advantages, ctx=self.args.ctx).reshape((-1, 1)) obs = nd.array(obs, ctx=self.args.ctx).reshape( (-1, self.observation_dim)) actions = nd.array(actions, ctx=self.args.ctx).reshape( (-1, self.action_dim)) values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1)) returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1)) oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape( (-1, self.action_dim)) # Learning rate scheduling # self.trainer.set_learning_rate(lr) # Auto grad with autograd.record(): # Value loss vpred, mu, sigma = self.net(obs) vpred_clipped = values + nd.clip( vpred - values, -self.args.clip_param, self.args.clip_param) vf_loss1 = nd.square(vpred - returns) vf_loss2 = nd.square(vpred_clipped - returns) vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2)) # Action loss pi_log_prob = self.net.log_prob(actions, mu, sigma) ratio = nd.exp(pi_log_prob - oldpi_log_prob) surr1 = ratio * advantages surr2 = nd.clip(ratio, 1.0 - self.args.clip_param, 1.0 + self.args.clip_param) * advantages actor_loss = -nd.mean(nd.minimum(surr1, surr2)) # Entropy term entropy = self.net.entropy(sigma) # Total loss loss = vf_loss * self.args.value_coefficient + actor_loss \ - entropy * self.args.entropy_coefficient # Compute gradients and updates loss.backward() self.trainer.step(obs.shape[0])
def yolo2_feature_spliter(feature, num_classes, anchor_scales): ''' Transpose/Reshape/Organize convolution outputs. ''' stride = num_classes + 5 feature = nd.transpose(feature, [0, 2, 3, 1]) #(32,16,16,14) feature = feature.reshape((0, 0, 0, -1, stride)) #(32,16,16,2,7) # class probs cls_pred = feature.slice_axis(begin=0, end=num_classes, axis=-1) # object score score_pred = feature.slice_axis(begin=num_classes, end=num_classes + 1, axis=-1) scores = nd.sigmoid(score_pred) # center prediction, in range(0,1) for each grid xy_pred = feature.slice_axis(begin=num_classes + 1, end=num_classes + 3, axis=-1) xy = nd.sigmoid(xy_pred) #pdb.set_trace() # 注意:此时的每个grid的中心坐标(x,y)表示的是位于当前grid cell的相对位置, 在最后预测阶段使用的是相对于全图的位置 x, y = transform_center(xy) # width/height prediction wh = feature.slice_axis(begin=num_classes + 3, end=num_classes + 5, axis=-1) # 同理,在后面的预测阶段需要将长度和宽度转换为相对于全图的长、宽 #pdb.set_trace() w, h = transform_size(wh, anchor_scales) # final class prediction category = nd.argmax(cls_pred, axis=-1, keepdims=True) # 注意:训练阶段使用的是【中心+长宽】的bbox,而最终预测阶段使用的思【左上角+右下角】的bbox,故提前准备好预测使用的bbox(都是相对全图的坐标) # 注意:一个细节:某些预测bbox的中心坐标可能位于图像边缘,且长宽已超出边界。这样当转换为corner坐标会出现负的或大于1. left = nd.clip(x - w / 2, 0, 1) top = nd.clip(y - h / 2, 0, 1) right = nd.clip(x + w / 2, 0, 1) bottom = nd.clip(y + h / 2, 0, 1) output_to_draw = nd.concat(*[category, scores, left, top, right, bottom], dim=-1) # 注意:这里必须加星号。否则 mxnet AssertionError: Positional arguments must have NDArray type, but got [... return output_to_draw, cls_pred, scores, nd.concat(*[xy, wh], dim=-1)
def step(self, indices, weights, grads, states): for index, weight, grad, state in zip(indices, weights, grads, states): self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) step, exp_avg, exp_avg_sq, slow_buffer = state step[0] += 1 # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = nd.clip(grad, -self.clip_gradient, self.clip_gradient) grad += wd * weight # Gradient Centralization operation for Conv layers and FC layers if self.use_gc and len(grad.shape) > self.gc_gradient_threshold: grad = grad - grad.mean(axis=tuple(range(1, len(grad.shape))), keepdims=True) # compute mean moving avg and variance moving avg exp_avg[:] = (exp_avg * self.beta1) + ((1 - self.beta1) * grad) exp_avg_sq[:] = (exp_avg_sq * self.beta2) + ( (1 - self.beta2) * grad * grad) buffered = self.radam_buffer[int(step[0] % 10)] if step[0] == buffered[0]: N_sma, step_size = buffered[1], buffered[2] else: buffered[0] = step[0] beta2_t = self.beta2**step[0] N_sma_max = 2 / (1 - self.beta2) - 1 N_sma = N_sma_max - 2 * step[0] * beta2_t / (1 - beta2_t) buffered[1] = N_sma if N_sma > self.n_sma_threshhold: step_size = math.sqrt( (1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - self.beta1**step[0]) else: step_size = 1.0 / (1 - self.beta1**step[0]) buffered[2] = step_size self.radam_buffer[int(step[0] % 10)] = buffered # apply lr new_lr = -step_size * lr if N_sma > self.n_sma_threshhold: denom = exp_avg_sq.sqrt() + self.epsilon weight[:] += new_lr * (exp_avg / denom) else: weight[:] += new_lr * exp_avg # integrated look ahead if step[0] % self.k == 0: slow_buffer[:] += (weight - slow_buffer) * self.alpha weight[:] = slow_buffer
def test_clip(): a = nd.arange(0, LARGE_X).reshape(LARGE_X, 1) b = nd.broadcast_to(a, shape=(a.shape[0], SMALL_Y)) res = nd.clip(b, a_min=100, a_max=1000) assert np.sum(res[-1].asnumpy() == 1000) == b.shape[1]
def forward(self, x): return nd.clip(x, self._low, self._high)