def class_and_score_forward(x): class_part = nd.slice_axis(x,begin=0,end=3,axis=-1) concentration_part = nd.slice_axis(x,begin=3,end=5,axis=-1) class_part = nd.sigmoid(class_part) concentration_part = nd.sigmoid(concentration_part) return class_part,concentration_part
def _minimize(self, data, labels): lot_loss = 0 # Create storage for batches of summed gradients accumulated_grads = {} for param_name, param in self._params.items(): accumulated_grads[param_name] = nd.zeros_like(param) for start_idx in range(0, self._hyperparams['lot_size'], self._batch_size): end_idx = min(self._hyperparams['lot_size'], start_idx + self._batch_size) batch_data = nd.slice_axis(data, axis=0, begin=start_idx, end=end_idx) batch_labels = nd.slice_axis(labels, axis=0, begin=start_idx, end=end_idx) # compute sum of clipped gradients for this batch of this lot lot_loss += self._accumulate_batch_gradients( batch_data, batch_labels, accumulated_grads) # then wait for computation to finish so that memory can be cleaned up before next batch nd.waitall() # use the computed gradients to update the parameters self._update_params(accumulated_grads) # block here, since the next step will depend on this result return lot_loss.asscalar() / self._hyperparams['lot_size']
def test_slice_axis(): a = create_2d_tensor(rows=SMALL_Y, columns=LARGE_X) c = nd.slice_axis(a, axis=0, begin=0, end=SMALL_Y//2) d = nd.slice_axis(a, axis=1, begin=0, end=LARGE_X//2) assert c.shape[0] == a.shape[0]//2 assert d.shape[1] == a.shape[1]//2 assert c[-1][0] == (SMALL_Y//2-1) assert d[-1][-1] == (SMALL_Y-1)
def forward(self, refined_anchors, targets, num_objects): """Generate training targets. Parameters ---------- refined_anchors. corner boxes. i.e. (xmin, ymin, xmax, ymax). (B, N, 4) targets: shape is (B, P, 5). (xmin, ymin, xmax, ymax, label) num_objects: shape is (B, ). the num of objects in each img. """ cls_targets = [] box_targets = [] box_masks = [] with autograd.pause(): for refined_anchor, target, num_object in zip( refined_anchors, targets, num_objects): # shape is (N, 4), (P, 5), scalar target = nd.slice_axis(target, axis=0, begin=0, end=num_object[0].asscalar()) # (M, 5) gt_id = nd.slice_axis(target, axis=1, begin=-1, end=None).reshape( (1, -1)) # (M, 1) -> (1, M) gt_box = nd.slice_axis(target, axis=1, begin=0, end=-1).reshape( (1, -1, 4)) # (M, 4) -> (1, M, 4) # ious (N, 1, M) --> (1, N, M) ious = nd.transpose(nd.contrib.box_iou(refined_anchor, gt_box), (1, 0, 2)) matches = self._matcher( ious ) # matched_object: 0<= val<= M-1, not-matched is -1. shape: (1, N) samples = self._sampler( matches) # object is +1, bg is -1. ignored is 0. (1, N) cls_target = self._cls_encoder(samples, matches, gt_id) # (1, N). # cls_targets: >1 for objects(fg); 0 for bg; -1 for ignored; refined_anchor = nd.expand_dims(refined_anchor, axis=0) # (N, 4) --> (1, N, 4) box_target, box_mask = self._box_encoder( samples, matches, refined_anchor, gt_box) # (1, N, 4) cls_targets.append(cls_target) box_targets.append(box_target) box_masks.append(box_mask) cls_targets = nd.concat(*cls_targets, dim=0) # (B, N) box_targets = nd.concat(*box_targets, dim=0) # (B, N, 4) box_masks = nd.concat( *box_masks, dim=0) # (B, N, 4). positive box are 1.0 others are 0.0. # cls_targets: >1 for objects(fg); 0 for bg; -1 for ignored; return cls_targets, box_targets, box_masks
def split_and_load_data(batch, ctx_list, batch_size): """ :param batch: :param ctx_list: :param batch_size: :return: new_batch:list of NDArray [[data1,data2,data3],[label1,label2,label3]] result of split data for each gpus """ num_ctx = len(ctx_list) num_sample_pre_batch = batch_size // num_ctx # total_batch = batch_size*num_ctx new_batch = [] # split one mini-batch to each ctx for i, data in enumerate(batch): new_data = [] for j, ctx in enumerate(ctx_list): begin = j * num_sample_pre_batch end = min((j + 1) * num_sample_pre_batch, batch_size) split_data = nd.slice_axis(data, axis=0, begin=begin, end=end) new_data.append(split_data.as_in_context(ctx)) new_batch.append(new_data) return new_batch
def yolo2_target(scores, boxes, labels, anchors, ignore_label=-1, thresh=0.5): """Generate training targets given predictions and labels. 网络预测的输出为(32,16,16,2,5) 而label的形式为:labels即ground truth(32,1,5),其中5包括一个class label:0,以及左上、右下两个corner相对于整张图的坐标 模型回归的目标形式: 注意:这里传入scores只是为了用其shape和context! """ b, h, w, n, _ = scores.shape anchors = np.reshape(np.array(anchors), (-1, 2)) #scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1) #boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1) gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1) target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context) target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context) sample_weight = nd.zeros( (b, h, w, n, 1), ctx=scores.context ) #注意:sample_weight的设置:只有和真实框的IOU最大的bbox sample_weight为1 !! for b in range(output.shape[0]): #b为遍历batch_size个batch中的每一个 # find the best match for each ground-truth label = labels[b].asnumpy() # 下一句仅仅是为了过滤掉错误的(小于零)的标签 valid_label = label[np.where(label[:, 0] > -0.5)[0], :] # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly np.random.shuffle(valid_label) for l in valid_label: gx, gy, gw, gh = (l[1] + l[3]) / 2, ( l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2] ind_x = int(gx * w) #算出第几行第几列的cell对当前groundtruth box负责 ind_y = int(gy * h) tx = gx * w - ind_x # 得出groudtruth的中心坐标相对于要负责的grid cell左上角点的偏移,【【该偏移量即模型要回归的目标数值!!!】】 ty = gy * h - ind_y gw = gw * w #得出groudtruth box 在feature map上的绝对宽度和高度 如 gw=4.23 gh=6.53 gh = gh * h # find the best match using width and height only, assuming centers are identical intersect = np.minimum(anchors[:, 0], gw) * np.minimum( anchors[:, 1], gh) #计算每个(共两个) anchor box与groundtruth bbox的交集面积 ovps = intersect / ( gw * gh + anchors[:, 0] * anchors[:, 1] - intersect ) # 计算每个(共两个) anchor box与groundtruth bbox的交并比 best_match = int( np.argmax(ovps)) #哪一个预先设定的bbox形状与groundtruth bbox的形状最匹配 target_id[b, ind_y, ind_x, best_match, :] = l[ 0] #### 将best_match的bbox的类别设置为该groudtruth bbox的类别 target_score[ b, ind_y, ind_x, best_match, :] = 1.0 #将best_match的bbox的score赋为1,其他bbox的score都为零 tw = np.log(gw / anchors[best_match, 0]) #【【????????????????】】 th = np.log(gh / anchors[best_match, 1]) target_box[b, ind_y, ind_x, best_match, :] = mx.nd.array( [tx, ty, tw, th]) #tx, ty, tw, th 即网络输出的四个坐标讯息 sample_weight[b, ind_y, ind_x, best_match, :] = 1.0 # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1]) return target_id, target_score, target_box, sample_weight
def forward(self, x): root = next(iter(self._structure.items()))[0] if (len(self._routerlayer) > 0): router_d, router_mat_d, weight_d, embedd_d = self._contextify(x)( root) # router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis = -1) # weight = nd.stack(*[weight_d[key] for key in sorted(weight_d)], axis = -1) # # embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis = 0) # router_mat = nd.stack( # *[router_mat_d[key] for key in sorted(router_mat_d)], axis = 1) # # presence = nd.sum(router_mat, axis = 2) # weight_adj = presence * weight # depth = len(self._weightlayer) - nd.topk(nd.reverse(presence, axis = 1)) # depth = depth - 1 # depth = depth[:, 0] # remainder = 1 - nd.sum(weight_adj, axis = 1) # # if (mx.autograd.is_training()): # # remainder = remainder + nd.choose_element_0index(weight_adj, depth) # remainder = remainder + nd.concat( # *[x[d] for d, x in zip(depth, weight_adj)], dim = 0) # # weight_adj = nd.fill_element_0index(weight_adj, remainder, depth) # weight_adj = nd.stack( # *[nd.concat(*[y if i != d else r for i, y in enumerate(x)], dim = 0) # for d, r, x in zip(depth, remainder, weight_adj) # ], axis = 0) # else: # remainder = remainder + nd.choose_element_0index(weight_adj, depth) # weight_adj = nd.fill_element_0index(weight_adj, remainder, depth) # # head = nd.sum(nd.expand_dims(weight_adj, axis = 2) * router_mat, axis = 1) # # return nd.dot(head, embedd) embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis=0) router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis=-1) router_mat = nd.stack( *[router_mat_d[key] for key in sorted(router_mat_d)], axis=1) where = nd.argmax(nd.maximum(0, 1 / (router + 0.5)), axis=1) head = nd.concat(*[router_mat[i][k] for i, k in enumerate(where)], dim=0) return nd.dot(head, embedd) else: head = nd.ones_like(nd.slice_axis(x, axis=1, begin=0, end=None)) return self._contextify(x)(root) * head
def _shard(split, x, l_fn, r_fn): splitsortorder = nd.argsort(split, axis=None) reorderedx = x[splitsortorder, :] reorderedsplit = split[splitsortorder] if (reorderedsplit[0] > 0): r_fn(reorderedx) elif (reorderedsplit[-1] < 0): l_fn(reorderedx) else: splitpt = nd.argsort(reorderedsplit, axis=0) * nd.sign(reorderedsplit) splitpt = nd.argsort(splitpt, axis=None)[0] + 1 lx = nd.slice_axis(reorderedx, 0, 0, int(splitpt.asscalar())) rx = nd.slice_axis(reorderedx, 0, int(splitpt.asscalar()), None) l_fn(lx) r_fn(rx)
def test_ssd_custom(net, valid_iter, ctx): mAP = gcv.utils.metrics.voc_detection.VOC07MApMetric( iou_thresh=0.5, class_names=('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')) batch_start = time.time() id_list, score_list, bbox_list = [], [], [] gtbbox_list, gtid_list = [], [] net.hybridize(static_alloc=True, static_shape=True) for k, batch in enumerate(valid_iter): X = batch[0].as_in_context(ctx) Y = batch[1].as_in_context(ctx) ids, scores, bboxes = net(X) gt_bboxes = nd.slice_axis(Y, axis=-1, begin=1, end=None) gt_ids = nd.slice_axis(Y, axis=-1, begin=0, end=1) id_list.append(ids) score_list.append(scores) bbox_list.append(bboxes) gtid_list.append(gt_ids) gtbbox_list.append(gt_bboxes) mAP.update(pred_bboxes=bbox_list, pred_labels=id_list, pred_scores=score_list, gt_bboxes=gtbbox_list, gt_labels=gtid_list) id_list, score_list, bbox_list = [], [], [] gtbbox_list, gtid_list = [], [] logger.info("test batch {} speeds {}".format( k, X.shape[0] / (time.time() - batch_start))) batch_start = time.time() names, values = mAP.get() for name, value in zip(names, values): logger.info("{} {}".format(name, value)) return values[-1]
def forward(self, scores, offsets, anchors, img): # 训练和预测的处理流程不同 if autograd.is_training(): pre_nms = self._train_pre_nms post_nms = self._train_post_nms else: pre_nms = self._test_pre_nms post_nms = self._test_post_nms with autograd.pause(): # 将预测的偏移量加到anchors中 rois = self._bbox_decoder(offsets, self._bbox_tocenter(anchors)) rois = self._cliper(rois, img) # 下面将所有尺寸小于设定最小值的ROI去除 x_min, y_min, x_max, y_max = nd.split(rois, num_outputs=4, axis=-1) width = x_max - x_min height = y_max - y_min invalid_mask = (width < self._min_size) + (height < self._min_size) # 将对应位置的score 设为-1 scores = nd.where(invalid_mask, nd.ones_like(scores) * -1, scores) invalid_mask = nd.repeat(invalid_mask, repeats=4, axis=-1) rois = nd.where(invalid_mask, nd.ones_like(rois) * -1, rois) # 下面进行NMS操作 pre = nd.concat(scores, rois, dim=-1) pre = nd.contrib.box_nms(pre, overlap_thresh=self._nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1, force_suppress=True) # 下面进行采样 result = nd.slice_axis(pre, axis=1, begin=0, end=post_nms) rpn_score = nd.slice_axis(result, axis=-1, begin=0, end=1) rpn_bbox = nd.slice_axis(result, axis=-1, begin=1, end=None) return rpn_score, rpn_bbox
def calc_auc(self, label, output): output_exp = output.exp() paratition = output_exp.sum(axis=1, keepdims=True) score = output_exp / paratition score = nd.slice_axis(score, axis=1, begin=1, end=2) if self.global_score is None: # for first time self.global_score = score self.global_lable = label else: self.global_score = nd.concat(self.global_score, score, dim=0) self.global_lable = nd.concat(self.global_lable, label, dim=0)
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None): xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1) bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4)) dx, dy, dw, dh = nd.split(data=bbox_delta_reshape, num_outputs=4, axis=2, squeeze_axis=1) if (means is not None) and (stds is not None): dx = dx * stds[0] + means[0] dy = dy * stds[1] + means[1] dw = dw * stds[2] + means[2] dh = dh * stds[3] + means[3] refine_center_x = nd.broadcast_add(lhs=center_x, rhs=nd.broadcast_mul(lhs=bbox_width, rhs=dx)) refine_center_y = nd.broadcast_add(lhs=center_y, rhs=nd.broadcast_mul(lhs=bbox_height, rhs=dy)) refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw)) refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh)) w_offset = 0.5 * (refined_width - 1.) h_offset = 0.5 * (refined_height - 1.) refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1) refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1) refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1) refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1) refined_bbox = nd.concat(refined_xmin, refined_ymin, refined_xmax, refined_ymax, dim=1) if im_info is not None: # assume im_info [[height, width, scale]] with shape (1,3) im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2) im_wh = nd.reverse(im_hw, axis=1) im_wh = im_wh - 1. im_wh = nd.tile(data=im_wh, reps=(1, 2)) im_wh = nd.Reshape(im_wh, shape=(1, 4, 1)) refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh) refined_bbox = nd.broadcast_maximum(lhs=refined_bbox, rhs=nd.zeros_like(refined_bbox)) # print refined_bbox.debug_str() return refined_bbox
def forward(self, anchors, pred_classes, pred_bboxes): """ :param anchors: (1, num-of-anchor, 4), anchors[0,0,:] = x0,y0,x1,y1 :param pred_classes: (batch-size, num-of-anchor, num-of-classes), including background :param pred_bboxes: (batch-size, num-of-anchor * 4) :param ids: (batch-size, num-of-found, 1) class id for each found :param scores: (batch-size, num-of-found, 1) class score for each found :param bboxes: (batch-size, num-of-found, 4) coordinates of each found (x0,y0,x1,y1) with norm w/h """ anchors = self.corner2center(anchors) pred_bboxes = nd.reshape(pred_bboxes, (0, -1, 4)) bboxes = self.bbox_decoder(pred_bboxes, anchors) cls_ids, scores = self.cls_decoder(nd.softmax(pred_classes, axis=-1)) results = [] for i in range(self.num_classes): cls_id = cls_ids.slice_axis(axis=-1, begin=i, end=i + 1) score = scores.slice_axis(axis=-1, begin=i, end=i + 1) # per class results per_result = nd.concat(*[cls_id, score, bboxes], dim=-1) results.append(per_result) result = nd.concat(*results, dim=1) if self.nms_thresh > 0 and self.nms_thresh < 1: result = nd.contrib.box_nms(result, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.01, id_index=0, score_index=1, coord_start=2, force_suppress=False) if self.post_nms > 0: result = result.slice_axis(axis=1, begin=0, end=self.post_nms) ids = nd.slice_axis(result, axis=2, begin=0, end=1) scores = nd.slice_axis(result, axis=2, begin=1, end=2) bboxes = nd.slice_axis(result, axis=2, begin=2, end=6) return ids, scores, bboxes
def forward(self, x): root = next(iter(self._structure.items()))[0] if (len(self._routerlayer) > 0): router_d, embedd_d = self._contextify(x)(root) embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis=0) router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis=-1) return nd.dot(router, embedd) else: head = nd.ones_like(nd.slice_axis(x, axis=1, begin=0, end=None)) return self._contextify(x)(root) * head
def _convert_score(self, score): """from cls to score Parameters ---------- score : ndarray network output Returns ------- get feature map score though softmax """ score = nd.transpose(score, axes=(1, 2, 3, 0)) score = nd.reshape(score, shape=(2, -1)) score = nd.transpose(score, axes=(1, 0)) score = nd.softmax(score, axis=1) score = nd.slice_axis(score, axis=1, begin=1, end=2) score = nd.squeeze(score, axis=1) return score.asnumpy()
def __getitem__(self, idx): img_path_512 = self._img_512.format(self.img_paths[idx]) img_path_256 = self._img_256.format(self.img_paths[idx]) img_path_128 = self._img_128.format(self.img_paths[idx]) lbl_path_256 = self._lbl_256.format(self.img_paths[idx]) mask_path_512 = self._mask_512.format(self.img_paths[idx]) lbl_path_512 = self._lbl_512.format(self.img_paths[idx]) img_arr_256 = mx.image.imread(img_path_256).astype( np.float32) / 127.5 - 1 img_arr_512 = mx.image.imread(img_path_512).astype( np.float32) / 127.5 - 1 img_arr_128 = mx.image.imread(img_path_128).astype( np.float32) / 127.5 - 1 img_arr_512 = mx.image.imresize(img_arr_512, img_wd * 2, img_ht) img_arr_in_512, img_arr_out_512 = [ mx.image.fixed_crop(img_arr_512, 0, 0, img_wd, img_ht), mx.image.fixed_crop(img_arr_512, img_wd, 0, img_wd, img_ht) ] if os.path.exists(mask_path_512): mask_512 = mx.image.imread(mask_path_512) else: mask_512 = mx.image.imread(mask_path_512.replace( ".png", '.jpg', 1)) tep_mask_512 = nd.slice_axis(mask_512, axis=2, begin=0, end=1) / 255 if self.is_transform: imgs = [ img_arr_out_512, img_arr_in_512, tep_mask_512, img_arr_256, img_arr_128 ] imgs = random_horizontal_flip(imgs) imgs = random_rotate(imgs) img_arr_out_512, img_arr_in_512, tep_mask_512, img_arr_256, img_arr_128 = imgs[ 0], imgs[1], imgs[2], imgs[3], imgs[4] img_arr_in_512, img_arr_out_512 = [ nd.transpose(img_arr_in_512, (2, 0, 1)), nd.transpose(img_arr_out_512, (2, 0, 1)) ] img_arr_out_256 = nd.transpose(img_arr_256, (2, 0, 1)) img_arr_out_128 = nd.transpose(img_arr_128, (2, 0, 1)) tep_mask_512 = tep_mask_512.reshape(tep_mask_512.shape[0], tep_mask_512.shape[1], 1) tep_mask_512 = nd.transpose(tep_mask_512, (2, 0, 1)) return img_arr_out_512, img_arr_in_512, tep_mask_512, img_arr_out_256, img_arr_out_128
def yolo2_target(scores, boxes, labels, anchors, ignore_label=-1, thresh=0.5): """Generate training targets given predictions and labels.""" b, h, w, n, _ = scores.shape anchors = np.reshape(np.array(anchors), (-1, 2)) #scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1) #boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1) gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1) target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context) target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context) sample_weight = nd.zeros((b, h, w, n, 1), ctx=scores.context) for b in range(output.shape[0]): # find the best match for each ground-truth label = labels[b].asnumpy() valid_label = label[np.where(label[:, 0] > -0.5)[0], :] # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly np.random.shuffle(valid_label) for l in valid_label: gx, gy, gw, gh = (l[1] + l[3]) / 2, ( l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2] ind_x = int(gx * w) ind_y = int(gy * h) tx = gx * w - ind_x ty = gy * h - ind_y gw = gw * w gh = gh * h # find the best match using width and height only, assuming centers are identical intersect = np.minimum(anchors[:, 0], gw) * np.minimum( anchors[:, 1], gh) ovps = intersect / (gw * gh + anchors[:, 0] * anchors[:, 1] - intersect) best_match = int(np.argmax(ovps)) target_id[b, ind_y, ind_x, best_match, :] = l[0] target_score[b, ind_y, ind_x, best_match, :] = 1.0 tw = np.log(gw / anchors[best_match, 0]) th = np.log(gh / anchors[best_match, 1]) target_box[b, ind_y, ind_x, best_match, :] = mx.nd.array([tx, ty, tw, th]) sample_weight[b, ind_y, ind_x, best_match, :] = 1.0 # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1]) return target_id, target_score, target_box, sample_weight
def validate(net, val_data, val_items, val_shapes, ctx, size, classes): """Test on validation dataset.""" clipper = gcv.nn.bbox.BBoxClipToImage() net.hybridize(static_alloc=True) print("---Detect Total {:d} Image Start.---".format(len(val_items))) result_dict = {} for ib, (batch, item) in enumerate(zip(val_data, val_items)): batch = split_and_load(batch, ctx_list=ctx) for x, y, im_scale in zip(*batch): ids, scores, bboxes = net(x) bboxes = clipper(bboxes, x) im_scale = im_scale.reshape((-1)).asscalar() bboxes *= im_scale inds = nd.argsort(nd.squeeze(ids, axis=(0, 2)), is_ascend=False) ids = nd.squeeze(ids, axis=(0, 2)).asnumpy().astype(np.int8).tolist() valid_ids = [id for id in ids if id is not -1] valid_len = len(valid_ids) if valid_len > 0: # valid_len must > 0 inds = nd.slice_axis(inds, begin=0, end=valid_len, axis=0) scores = nd.take(scores, inds, axis=1) bboxes = nd.take(bboxes, inds, axis=1) scores = scores.asnumpy() bboxes = bboxes.asnumpy() for i, id in enumerate(valid_ids): score = scores[:, i, 0][0] xmin, ymin, xmax, ymax = bboxes[:, i, 0][ 0], bboxes[:, i, 1][0], bboxes[:, i, 2][0], bboxes[:, i, 3][0] result_dict[id] = result_dict.get( id, []) + [[item, score, xmin, ymin, xmax, ymax]] print("Detect Image {:s} Done.".format(item)) print("---Detect Total {:d} Image Done.---".format(len(val_items))) return result_dict
def forward(self, x): root = next(iter(self._structure.items()))[0] if (len(self._routerlayer) > 0): router, router_mat, weight, embedd = self._contextify(x)(root) presence = nd.sum(router_mat, axis=2) weight_adj = presence * weight depth = len(self._weightlayer) - nd.topk( nd.reverse(presence, axis=1)) depth -= 1 depth = depth[:, 0] remainder = 1 - nd.sum(weight_adj, axis=1) remainder += nd.choose_element_0index(weight_adj, depth) weight_adj = nd.fill_element_0index(weight_adj, remainder, depth) head = nd.sum(nd.expand_dims(weight_adj, axis=2) * router_mat, axis=1) return nd.expand_dims(nd.dot(head, embedd), axis=-1) else: head = nd.ones_like(nd.slice_axis(x, axis=1, begin=0, end=None)) return self._contextify(x)(root) * head
def test_slice_axis(): a = create_vector(size=LARGE_X) med = LARGE_X // 2 c = nd.slice_axis(a, axis=0, begin=0, end=med) assert c.shape[0] == a.shape[0] // 2 assert c[-1][0] == (med - 1)
else: mod.init_params(arg_params=shared_params, aux_params=None, allow_missing=True) else: mod.set_params(arg_params=dict(shared_params.items() + fc6_params[k].items()), aux_params=None) if (not mod.optimizer_initialized): mod.init_optimizer(optimizer='sgd', optimizer_params=optimizer_params) # 训练模型 mod.forward(data) pos_score = nd.slice_axis(mod.get_outputs()[0], axis=0, begin=0, end=32) neg_score = nd.slice_axis(mod.get_outputs()[0], axis=0, begin=32, end=128) mod.backward() mod.update() shared_params = { 'conv1_weight': mod.get_params()[0]['conv1_weight'], 'conv1_bias': mod.get_params()[0]['conv1_bias'], 'conv2_weight': mod.get_params()[0]['conv2_weight'], 'conv2_bias': mod.get_params()[0]['conv2_bias'], 'conv3_weight': mod.get_params()[0]['conv3_weight'], 'conv3_bias': mod.get_params()[0]['conv3_bias'],
def forward(self, cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds): """Compute loss in entire batch across devices.""" scale = 4 # require results across different devices at this time cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds = \ [_as_list(x) for x in (cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds)] # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] ctr_losses = [] box_losses = [] mask_losses = [] sum_losses = [] for clst, ctrt, boxt, maskt, matche, clsp, ctrp, boxp, maskp, maskcoep in zip( *[ cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds ]): pos_gt_mask = clst > 0 # cls loss if not self._from_logits: clsp = nd.sigmoid(clsp) one_hot = nd.one_hot(clst, self._num_class) one_hot = nd.slice_axis(one_hot, begin=1, end=None, axis=-1) pt = nd.where(one_hot, clsp, 1 - clsp) t = nd.ones_like(one_hot) alpha = nd.where(one_hot, self._alpha * t, (1 - self._alpha) * t) cls_loss = -alpha * ( (1 - pt)**self._gamma) * nd.log(nd.minimum(pt + self._eps, 1)) cls_loss = nd.sum(cls_loss) / nd.maximum(nd.sum(pos_gt_mask), 1) cls_losses.append(cls_loss) # ctr loss ctrp = nd.squeeze(ctrp, axis=-1) pos_pred_mask = ctrp >= 0 ctr_loss = (ctrp * pos_pred_mask - ctrp * ctrt + nd.log(1 + nd.exp(-nd.abs(ctrp)))) * pos_gt_mask ctr_loss = nd.sum(ctr_loss) / nd.maximum(nd.sum(pos_gt_mask), 1) ctr_losses.append(ctr_loss) # box loss // iou loss px1, py1, px2, py2 = nd.split(boxp, num_outputs=4, axis=-1, squeeze_axis=True) gx1, gy1, gx2, gy2 = nd.split(boxt, num_outputs=4, axis=-1, squeeze_axis=True) apd = nd.abs(px2 - px1 + 1) * nd.abs(py2 - py1 + 1) agt = nd.abs(gx2 - gx1 + 1) * nd.abs(gy2 - gy1 + 1) iw = nd.maximum( nd.minimum(px2, gx2) - nd.maximum(px1, gx1) + 1., 0.) ih = nd.maximum( nd.minimum(py2, gy2) - nd.maximum(py1, gy1) + 1., 0.) ain = iw * ih + 1. union = apd + agt - ain + 1 ious = nd.maximum(ain / union, 0.) fg_mask = nd.where(clst > 0, nd.ones_like(clst), nd.zeros_like(clst)) box_loss = -nd.log(nd.minimum(ious + self._eps, 1.)) * fg_mask if self._return_iou: box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask), 1), ious else: box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask), 1) box_losses.append(box_loss) # mask loss rank = (-matche).argsort(axis=-1) rank = nd.split(rank, 2, axis=0, squeeze_axis=True) matche = nd.split(matche, 2, axis=0, squeeze_axis=True) maskp = nd.split(maskp, 2, axis=0, squeeze_axis=True) maskt = nd.split(maskt, 2, axis=0, squeeze_axis=True) boxt = nd.split(boxt, 2, axis=0, squeeze_axis=True) maskcoep = nd.split(maskcoep, 2, axis=0, squeeze_axis=True) agt = nd.split(agt, 2, axis=0, squeeze_axis=True) mask_loss = [] for ranki, matchei, maskpi, maskti, boxti, maskcoepi, agti in zip( rank, matche, maskp, maskt, boxt, maskcoep, agt): idx = nd.slice(ranki, 0, 200) pos_mask = nd.take(matchei >= 0, idx) pos_box = nd.take(boxti, idx) area = nd.take(agti, idx) weight = (self.gt_weidth * self.gt_height / (area + self._eps)) * pos_mask mask_idx = nd.take(matchei, idx) maskti = nd.take(maskti, mask_idx) maskpi = nd.dot(nd.take(maskcoepi, idx), maskpi) maskpi = nd.sigmoid(maskpi) with autograd.pause(): _h = nd.arange(186, ctx=maskpi.context) _w = nd.arange(186, ctx=maskpi.context) _h = nd.tile(_h, reps=(pos_box.shape[0], 1)) _w = nd.tile(_w, reps=(pos_box.shape[0], 1)) x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale), num_outputs=4, axis=-1) _w = (_w >= x1) * (_w <= x2) _h = (_h >= y1) * (_h <= y2) _mask = nd.batch_dot(_h.expand_dims(axis=-1), _w.expand_dims(axis=-1), transpose_b=True) maskpi = maskpi * _mask mask_loss.append( nd.sum(self.SBCELoss(maskpi, maskti) * weight) / nd.sum(pos_mask + self._eps)) # if sum(pos_num)>1400: # print(sum(pos_num)) # print(pos_num) # pos_num = (matche >=0).sum(axis=-1).asnumpy() # rank = (-matche).argsort(axis=-1) # mask_loss = [] # for i in range(maskp.shape[0]): # if pos_num[i] == 0.: # # print(pos_num) # mask_loss.append(nd.zeros(shape=(1,), ctx=maskp.context)) # continue # idx = rank[i, :int(pos_num[i])] # pos_box = nd.take(boxt[i], idx) # area = (pos_box[:, 3] - pos_box[:, 1]) * (pos_box[:, 2] - pos_box[:, 0]) # weight = self.gt_weidth * self.gt_height / (area+self._eps) # maskti = maskt[i, matche[i, idx], :, :] # maskpi = nd.dot(nd.take(maskcoep[i], idx), maskp[i]) # _, h, w = maskpi.shape # maskpi = nd.sigmoid(maskpi) # with autograd.pause(): # _h = nd.arange(h, ctx=maskpi.context) # _w = nd.arange(w, ctx=maskpi.context) # _h = nd.tile(_h, reps=(pos_box.shape[0], 1)) # _w = nd.tile(_w, reps=(pos_box.shape[0], 1)) # x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale), num_outputs=4, axis=-1) # _w = (_w >= x1) * (_w <= x2) # _h = (_h >= y1) * (_h <= y2) # _mask = nd.batch_dot(_h.expand_dims(axis=-1), _w.expand_dims(axis=-1), transpose_b=True) # maskpi = maskpi * _mask # mask_loss.append(nd.sum(self.SBCELoss(maskpi, maskti) * weight)/pos_num[i]) mask_loss = nd.mean(nd.concat(*mask_loss, dim=0)) mask_losses.append(mask_loss) sum_losses.append(self._cls_lambd * cls_losses[-1] + self._ctr_lambd * ctr_losses[-1] + self._box_lambd * box_losses[-1] + self._mask_lambd * mask_losses[-1]) return sum_losses, cls_losses, ctr_losses, box_losses, mask_losses
def forward(self, anchors, pred_classes, pred_bboxes, groundtruth, data=None): """ :param anchors: (1, num-of-anchor, 4), anchors[0,0,:] = cx,cy,w,h :param pred_classes: (batch-size, num-of-anchor, num-of-classes), including background :param pred_bboxes: (batch-size, num-of-anchor * 4) ------------------useless :param groundtruth: (batch-size, max-object-of-one-image, 5), groundtruth[0,0,:] = (cls,x0,y0,x1,y1), (x0,y0,x1,y1) normalized by image size :return: cls_targets: (batch-size, num-of-anchor, num-of-classes), cls_targets[i,j] = (cls_id+1 for anchor j in image i), including background as class 0 bbox_targets: (batch-size, num-of-anchor, 4), bbox_targets[i,j,:] = (offset of anchor j in image i) (center mode) bbox_masks: (batch-size, num-of-anchor, 4),bbox_mask[i,j,:] = (mask value of anchor j in image i) """ #anchors = self.center_to_corner(anchors.reshape(-1,4)) anchors = nd.squeeze(anchors) gt_bboxes = nd.slice_axis(groundtruth, axis=-1, begin=1, end=None) gt_classes = nd.slice_axis(groundtruth, axis=-1, begin=0, end=1) ious = nd.transpose( nd.contrib.box_iou(anchors, gt_bboxes, format='corner'), (1, 0, 2)) matches = self.matcher( ious ) #matches: (batch-size, num-of-anchor), matches[i,j] = (idx-of-object in image i matched with anchor j) samples = self.sampler( matches, pred_classes, ious) #(batch-size, num-of-anchor), samples[i,j] = -1 or 1 if data is not None: img = nd.clip(nd.transpose(data[0], (1, 2, 0)) * 255.0, 0, 255).asnumpy().astype(np.uint8) H, W, C = img.shape bboxes = gt_bboxes[0] for row in range(bboxes.shape[0]): x0, y0, x1, y1 = bboxes[row, :].asnumpy().tolist() if x0 < 0: continue x0, x1 = int(x0 * W), int(x1 * W) y0, y1 = int(y0 * H), int(y1 * H) cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2) if 0: for row in range(anchors.shape[0]): x0, y0, x1, y1 = anchors[row].asnumpy().tolist() if x0 < 0: continue print('sz = {} ratio = {}'.format((x1 - x0) * (y1 - y0), (x1 - x0) / (y1 - y0))) for row in range(matches[0].shape[0]): if samples[0, row] < 1: continue idx = matches[0, row] if idx < 0: #if idx == 0: continue x0, y0, x1, y1 = anchors[row].asnumpy().tolist() x0, x1 = int(x0 * W), int(x1 * W) y0, y1 = int(y0 * H), int(y1 * H) cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 1) cv2.imwrite("vis.jpg", img) cv2.imshow("vis", img) cv2.waitKey(-1) cls_targets = self.cls_encoder( samples, matches, gt_classes ) #(batch-size, num-of-anchor) cls_targets[i,j] = (cls_id+1 for anchor j in image i) bbox_targets, bbox_masks = self.bbox_encoder( samples, matches, anchors, gt_bboxes) #(batch-size, num-of-anchor, 4) #bbox_targets[i,j,:] = (offset of anchor j in image i) #bbox_mask[i,j,:] = (mask value of anchor j in image i) return cls_targets, bbox_targets, bbox_masks
def forward(self, is_train, req, in_data, out_data, aux): nms_start_time = time.time() #inputs cls_score = in_data[0] bbox_pred = in_data[1] rois = in_data[2] im_info = in_data[3] fc_all_2_relu = in_data[4] nms_rank_weight = in_data[5] nms_rank_bias = in_data[6] roi_feat_embedding_weight = in_data[7] roi_feat_embedding_bias = in_data[8] nms_pair_pos_fc1_1_weight = in_data[9] nms_pair_pos_fc1_1_bias = in_data[10] nms_query_1_weight = in_data[11] nms_query_1_bias = in_data[12] nms_key_1_weight = in_data[13] nms_key_1_bias = in_data[14] nms_linear_out_1_weight = in_data[15] nms_linear_out_1_bias = in_data[16] nms_logit_weight = in_data[17] nms_logit_bias = in_data[18] if self.has_non_gt_index: non_gt_index = in_data[19] else: non_gt_index = None if self.nongt_dim is not None: cls_score_nongt = nd.slice_axis(data=cls_score, axis=0, begin=0, end=self.nongt_dim) # cls_score_nongt = monitor_wrapper(cls_score_nongt, 'cls_score_nongt') bbox_pred_nongt = nd.slice_axis(data=bbox_pred, axis=0, begin=0, end=self.nongt_dim) elif non_gt_index is not None: cls_score_nongt = nd.take(a=cls_score, indices=non_gt_index) bbox_pred_nongt = nd.take(a=bbox_pred, indices=non_gt_index) else: cls_score_nongt = cls_score bbox_pred_nongt = bbox_pred bbox_pred_nongt = nd.BlockGrad(bbox_pred_nongt) # remove batch idx and gt roi sliced_rois = nd.slice_axis(data=rois, axis=1, begin=1, end=None) if self.nongt_dim is not None: sliced_rois = nd.slice_axis(data=sliced_rois, axis=0, begin=0, end=self.nongt_dim) elif non_gt_index is not None: sliced_rois = nd.take(a=sliced_rois, indices=non_gt_index) # bbox_pred_nobg, [num_rois, 4*(num_reg_classes-1)] bbox_pred_nobg = nd.slice_axis(data=bbox_pred_nongt, axis=1, begin=4, end=None) # [num_boxes, 4, num_reg_classes-1] refined_bbox = refine_bbox_nd(sliced_rois, bbox_pred_nobg, im_info, means=self.bbox_means, stds=self.bbox_stds) # softmax cls_score to cls_prob, [num_rois, num_classes] cls_prob = nd.softmax(data=cls_score_nongt, axis=-1) cls_prob_nobg = nd.slice_axis(cls_prob, axis=1, begin=1, end=None) sorted_cls_prob_nobg = nd.sort(data=cls_prob_nobg, axis=0, is_ascend=False) # sorted_score, [first_n, num_fg_classes] sorted_score = nd.slice_axis(sorted_cls_prob_nobg, axis=0, begin=0, end=self.first_n, name='sorted_score') max_score_per_class = sorted_score.max(axis=0) max_score_per_class_numpy = max_score_per_class.asnumpy() valid_class_thresh = self.class_thresh valid_class_thresh = np.minimum(valid_class_thresh, max_score_per_class_numpy.max()) valid_class_indices = np.where( max_score_per_class_numpy >= valid_class_thresh)[0] invalid_class_indices = np.where( max_score_per_class_numpy < valid_class_thresh)[0] num_valid_classes = len(valid_class_indices) valid_class_indices_nd = nd.array(valid_class_indices, ctx=sorted_score.context) # sort by score rank_indices = nd.argsort(data=cls_prob_nobg, axis=0, is_ascend=False) # first_rank_indices, [first_n, num_fg_classes] first_rank_indices = nd.slice_axis(rank_indices, axis=0, begin=0, end=self.first_n) valid_first_rank_indices = first_rank_indices.transpose().take( valid_class_indices_nd).transpose() # sorted_bbox, [first_n, num_fg_classes, 4, num_reg_classes-1] sorted_bbox = nd.take(a=refined_bbox, indices=first_rank_indices) if self.class_agnostic: # sorted_bbox, [first_n, num_fg_classes, 4] sorted_bbox = nd.Reshape(sorted_bbox, shape=(0, 0, 0), name='sorted_bbox') else: cls_mask = nd.arange(0, self.num_fg_classes) cls_mask = nd.Reshape(cls_mask, shape=(1, -1, 1)) cls_mask = nd.broadcast_to(cls_mask, shape=(self.first_n, 0, 4)) # sorted_bbox, [first_n, num_fg_classes, 4] sorted_bbox = nd.pick(data=sorted_bbox, name='sorted_bbox', index=cls_mask, axis=3) valid_sorted_bbox = sorted_bbox.transpose( (1, 0, 2)).take(valid_class_indices_nd).transpose((1, 0, 2)) # sorted_bbox = monitor_wrapper(sorted_bbox, 'sorted_bbox') # nms_rank_embedding, [first_n, 1024] nms_rank_embedding = extract_rank_embedding_nd(self.first_n, 1024) # nms_rank_feat, [first_n, 1024] nms_rank_feat = nd.FullyConnected(name='nms_rank', data=nms_rank_embedding, num_hidden=128, weight=nms_rank_weight, bias=nms_rank_bias) # nms_position_matrix, [num_valid_classes, first_n, first_n, 4] nms_position_matrix = extract_multi_position_matrix_nd( valid_sorted_bbox) # roi_feature_embedding, [num_rois, 1024] # fc_all_2_relu = monitor_wrapper(fc_all_2_relu, 'fc_all_2_relu') roi_feat_embedding = nd.FullyConnected( name='roi_feat_embedding', data=fc_all_2_relu, num_hidden=128, weight=roi_feat_embedding_weight, bias=roi_feat_embedding_bias) # sorted_roi_feat, [first_n, num_valid_classes, 128] sorted_roi_feat = nd.take(a=roi_feat_embedding, indices=valid_first_rank_indices) # vectorized nms # nms_embedding_feat, [first_n, num_valid_classes, 128] nms_embedding_feat = nd.broadcast_add(lhs=sorted_roi_feat, rhs=nd.expand_dims(nms_rank_feat, axis=1)) # nms_attention_1, [first_n, num_valid_classes, 1024] nms_attention_1 = nms_attention_nd( nms_embedding_feat, nms_position_matrix, nms_pair_pos_fc1_1_weight, nms_pair_pos_fc1_1_bias, nms_query_1_weight, nms_query_1_bias, nms_key_1_weight, nms_key_1_bias, nms_linear_out_1_weight, nms_linear_out_1_bias, num_rois=self.first_n, index=1, group=self.nms_attention_group, dim=self.nms_attention_dim, fc_dim=self.nms_attention_fc_dim, feat_dim=self.nms_attention_feat_dim) nms_all_feat_1 = nms_embedding_feat + nms_attention_1 nms_all_feat_1_relu = nd.Activation(data=nms_all_feat_1, act_type='relu', name='nms_all_feat_1_relu') # [first_n * num_valid_classes, 1024] nms_all_feat_1_relu_reshape = nd.Reshape(nms_all_feat_1_relu, shape=(-3, -2)) # logit, [first_n * num_valid_classes, num_thresh] nms_conditional_logit = nd.FullyConnected( name='nms_logit', data=nms_all_feat_1_relu_reshape, num_hidden=self.num_thresh, weight=nms_logit_weight, bias=nms_logit_bias) # logit_reshape, [first_n, num_valid_classes, num_thresh] nms_conditional_logit_reshape = nd.Reshape(nms_conditional_logit, shape=(self.first_n, num_valid_classes, self.num_thresh)) nms_conditional_score = nd.Activation( data=nms_conditional_logit_reshape, act_type='sigmoid', name='nms_conditional_score') if num_valid_classes == self.num_fg_classes: full_nms_conditional_score = nms_conditional_score else: full_nms_conditional_score = nd.concat( nms_conditional_score, nd.zeros( (self.first_n, self.num_fg_classes - num_valid_classes, self.num_thresh), ctx=nms_conditional_score.context), dim=1) all_indexes = np.concatenate( (valid_class_indices, invalid_class_indices)) restore_indexes = np.zeros((self.num_fg_classes)) restore_indexes[all_indexes] = np.arange(self.num_fg_classes) restore_indexes = nd.array(restore_indexes, ctx=nms_conditional_score.context) full_nms_conditional_score = full_nms_conditional_score.transpose( (1, 0, 2)).take(restore_indexes).transpose((1, 0, 2)) sorted_score_reshape = nd.expand_dims(sorted_score, axis=2) # sorted_score_reshape = nd.BlockGrad(sorted_score_reshape) nms_multi_score = nd.broadcast_mul(lhs=sorted_score_reshape, rhs=full_nms_conditional_score) _ = nms_multi_score.mean().asnumpy() all_time = time.time() - nms_start_time if 'learn_nms_time' not in globals().keys( ) or 'learn_nms_count' not in globals().keys(): globals()['learn_nms_time'] = [] globals()['learn_nms_count'] = 0 if globals()['learn_nms_count'] >= 1000: globals()['learn_nms_time'].pop(0) globals()['learn_nms_time'].append(all_time) else: globals()['learn_nms_time'].append(all_time) globals()['learn_nms_count'] += 1 if globals()['learn_nms_count'] % 250 == 0: print("--->> learn nms running average time cost: {}".format( float(sum(globals()['learn_nms_time'])) / (1000 if globals()['learn_nms_count'] > 1000 else globals()['learn_nms_count']))) self.assign(out_data[0], req[0], nms_multi_score) self.assign(out_data[1], req[1], sorted_bbox) self.assign(out_data[2], req[2], sorted_score)
def forward(self, x, gt_boxes=None): """ :param x: ndarray (B,C,H,W) :return: """ def _split_box(x, num_outputs, axis, squeeze_axis=False): a = nd.split(x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis) if not isinstance(a, (list, tuple)): return [a] return a # 首先用basenet抽取特征 feat = self.features(x) # 输入RPN网络 if autograd.is_training(): # 训练过程 img = nd.zeros_like(x) rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors = self.rpn( feat, img) # 采样输出 rpn_box, samples, matches = self.sampler(rpn_box, rpn_score, gt_boxes) else: # 预测过程 # output shape (B,N,4) _, rpn_box = self.rpn(feat, x) # 对输出的Region Proposal 进行采样 # 输出送到后面运算的RoI # rois shape = (B,self._num_sampler,4), num_roi = self._num_sample if autograd.is_training( ) else self._rpn_test_post_nms # 将rois变为2D,加上batch_index with autograd.pause(): roi_batchid = nd.arange(0, self._max_batch, repeat=num_roi, ctx=rpn_box.context) rpn_roi = nd.concat( *[roi_batchid.reshape((-1, 1)), rpn_box.reshape((-1, 4))], dim=-1) rpn_roi = nd.stop_gradient(rpn_roi) # RoI Pooling 层 if self._roi_mode == 'pool': # (Batch*num_roi,channel,H,W) pool_feat = nd.ROIPooling(feat, rpn_roi, self._roi_size, 1 / self._stride) elif self._roi_mode == 'align': pool_feat = nd.contrib.ROIAlign(feat, rpn_roi, self._roi_size, 1 / self._stride, sample_ratio=2) else: raise ValueError("Invalid roi mode: {}".format(self._roi_mode)) top_feat = self.top_features(pool_feat) avg_feat = self.global_avg_pool(top_feat) # 类别预测,回归预测 # output shape (B*num_roi,(num_cls+1)) -> (B,N,C) cls_pred = self.class_predictor(avg_feat) # output shape (B*num_roi,(num_cls)*4) -> (B,N,C,4) box_pred = self.bbox_predictor(avg_feat) cls_pred = cls_pred.reshape( (self._max_batch, num_roi, self.num_class + 1)) box_pred = box_pred.reshape( (self._max_batch, num_roi, self.num_class, 4)) # 训练过程 if autograd.is_training(): return (cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors) # 预测过程 # 还要进行的步骤,将预测的类别和预测的偏移量加到输入的RoI中 else: # 直接输出所有类别的信息 # cls_id (B,N,C) scores(B,N,C) cls_ids, scores = self.cls_decoder(nd.softmax(cls_pred, axis=-1)) # 将所有的C调换到第一维 # (B,N,C) -----> (B,N,C,1) -------> (B,C,N,1) cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) # (B,N,C) -----> (B,N,C,1) -------> (B,C,N,1) scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) # (B,N,C,4) -----> (B,C,N,4), box_pred = box_pred.transpose((0, 2, 1, 3)) rpn_boxes = _split_box(rpn_box, num_outputs=self._max_batch, axis=0, squeeze_axis=False) cls_ids = _split_box(cls_ids, num_outputs=self._max_batch, axis=0, squeeze_axis=True) scores = _split_box(scores, num_outputs=self._max_batch, axis=0, squeeze_axis=True) box_preds = _split_box(box_pred, num_outputs=self._max_batch, axis=0, squeeze_axis=True) results = [] # 对每个batch分别进行decoder nms for cls_id, score, box_pred, rpn_box in zip( cls_ids, scores, box_preds, rpn_boxes): # box_pred(C,N,4) rpn_box(1,N,4) box (C,N,4) box = self.box_decoder(box_pred, self.box_to_center(rpn_box)) # cls_id (C,N,1) score (C,N,1) box (C,N,4) # result (C,N,6) res = nd.concat(*[cls_id, score, box], dim=-1) # nms操作 (C,self.nms_topk,6) res = nd.contrib.box_nms(res, overlap_thresh=self.nms_thresh, valid_thresh=0.0001, topk=self.nms_topk, coord_start=2, score_index=1, id_index=0, force_suppress=True) res = res.reshape((-3, 0)) results.append(res) results = nd.stack(*results, axis=0) ids = nd.slice_axis(results, axis=-1, begin=0, end=1) scores = nd.slice_axis(results, axis=-1, begin=1, end=2) bboxes = nd.slice_axis(results, axis=-1, begin=2, end=6) # 输出为score,bbox return ids, scores, bboxes
def hybrid_forward(self, F, inputs, outputs, initial_hidden_state, batch_size_seq): #문장 길이 2 == END tag index inputs = F.cast(inputs, dtype='float32') in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx, F.ones_like(inputs), F.zeros_like(inputs)), axis=1) outputs = F.cast(outputs, dtype='float32') out_sent_last_idx = F.argmax(F.where(outputs == self.end_idx, F.ones_like(outputs), F.zeros_like(outputs)), axis=1) #encoder GRU embeddinged_in = F.cast(self.embedding(inputs), dtype='float32') next_h = initial_hidden_state for j in range(self.in_seq_len): p_outputs = F.slice_axis(embeddinged_in, axis=1, begin=j, end=j + 1) p_outputs = F.reshape(p_outputs, (-1, self.embed_dim)) enout, (next_h, ) = self.encoder(p_outputs, [ next_h, ]) if j == 0: enouts = enout next_hs = next_h else: enouts = F.concat(enouts, enout, dim=1) next_hs = F.concat(next_hs, next_h, dim=1) #masking with 0 using length enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden)) enouts = F.transpose(enouts, (1, 0, 2)) enouts = F.SequenceMask(enouts, sequence_length=in_sent_last_idx + 1, use_sequence_length=True) enouts = F.transpose(enouts, (1, 0, 2)) next_hs = F.reshape(next_hs, (-1, self.n_hidden)) #take가 0 dim만 지원하기 때문에.. # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...) next_hs = next_hs.take(in_sent_last_idx + (batch_size_seq * self.max_seq_length)) embeddinged_out = F.cast(self.embedding(outputs), dtype='float32') #decoder GRU with attention for i in range(self.out_seq_len): #out_seq_len 길이만큼 GRUCell을 unroll하면서 출력값을 적재한다. p_outputs = F.slice_axis(embeddinged_out, axis=1, begin=i, end=i + 1) p_outputs = F.reshape(p_outputs, (-1, self.embed_dim)) # p_outputs = outputs[:,i,:] # 위와 같이 진행한 이유는 hybridize를 위함 if self.attention: p_outputs, _ = self.apply_attention(F=F, inputs=p_outputs, hidden=next_hs, encoder_outputs=enouts) deout, (next_hs, ) = self.decoder(p_outputs, [ next_hs, ]) if i == 0: deouts = deout else: deouts = F.concat(deouts, deout, dim=1) #2dim -> 3dim 으로 reshape deouts = F.reshape(deouts, (-1, self.out_seq_len, self.n_hidden)) #0 padding deouts = F.transpose(deouts, (1, 0, 2)) deouts = F.SequenceMask(deouts, sequence_length=out_sent_last_idx + 1, use_sequence_length=True) deouts = F.transpose(deouts, (1, 0, 2)) deouts = self.batchnorm(deouts) deouts_fc = self.dense(deouts) return (deouts_fc)
def calulation(self, input_str, ko_dict, en_dict, en_rev_dict, ctx): """ inference 코드 """ #앞뒤에 START,END 코드 추가 input_str = [ [ 'START', ] + mecab.morphs(input_str.strip()) + [ 'END', ], ] X = encoding_and_padding(input_str, ko_dict, max_seq=self.max_seq_length) #string to embed inputs = F.array(X, ctx=ctx) inputs = F.cast(inputs, dtype='float32') in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx, F.ones_like(inputs), F.zeros_like(inputs)), axis=1) #encoder GRU embeddinged_in = F.cast(self.embedding(inputs), dtype='float32') next_h = F.random.normal(0, 1, (1, self.n_hidden), ctx=ctx) for j in range(self.in_seq_len): p_outputs = F.slice_axis(embeddinged_in, axis=1, begin=j, end=j + 1) p_outputs = F.reshape(p_outputs, (-1, self.embed_dim)) enout, (next_h, ) = self.encoder(p_outputs, [ next_h, ]) if j == 0: enouts = enout next_hs = next_h else: enouts = F.concat(enouts, enout, dim=1) next_hs = F.concat(next_hs, next_h, dim=1) #masking with 0 using length enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden)) enouts = F.transpose(enouts, (1, 0, 2)) enouts = F.SequenceMask(enouts, sequence_length=in_sent_last_idx + 1, use_sequence_length=True) enouts = F.transpose(enouts, (1, 0, 2)) next_hs = F.reshape(next_hs, (-1, self.n_hidden)) #take가 0 dim만 지원하기 때문에.. # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...) next_hs = next_hs.take(in_sent_last_idx) #디코더의 초기 입력값으로 넣을 'START'를 임베딩한다. Y_init = F.array([ [ en_dict['START'], ], ], ctx=ctx) Y_init = F.cast(self.embedding(Y_init), dtype='float32') deout = Y_init[:, 0, :] #출력 시퀀스 길이만큼 순회 for i in range(self.out_seq_len): if self.attention: #print(deout.shape) deout, att_weight = self.apply_attention( F=F, inputs=deout, hidden=next_hs, encoder_outputs=enouts) if i == 0: att_weights = att_weight else: att_weights = F.concat(att_weights, att_weight, dim=0) deout, (next_hs, ) = self.decoder(deout, [ next_hs, ]) #batchnorm을 적용하기 위해 차원 증가/원복 deout = F.expand_dims(deout, axis=1) deout = self.batchnorm(deout) #reduce dim deout = deout[:, 0, :] #'START'의 다음 시퀀스 출력값도출 deout_sm = self.dense(deout) #print(deout_sm.shape) deout = F.one_hot(F.argmax(F.softmax(deout_sm, axis=1), axis=1), depth=self.vocab_size) #print(deout.shape) #decoder에 들어갈 수 있는 형태로 변환(임베딩 적용 및 차원 맞춤) deout = F.argmax(deout, axis=1) deout = F.expand_dims(deout, axis=0) deout = F.cast(self.embedding(deout)[:, 0, :], dtype='float32') gen_char = en_rev_dict[F.argmax(deout_sm, axis=1).asnumpy()[0].astype('int')] if gen_char == '__PAD__' or gen_char == 'END': break else: if i == 0: ret_seq = [ gen_char, ] else: ret_seq += [ gen_char, ] return (" ".join(ret_seq), att_weights)
def train(net, train_data, val_data, classes, args): """Training pipeline""" for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) trainer = gluon.Trainer( net.collect_params(), 'sgd', {'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.momentum}) # lr decay policy lr_decay = float(args.lr_decay) lr_steps = sorted([float(ls) for ls in args.lr_decay_epoch.split(',') if ls.strip()]) cls_loss = gluon.loss.SoftmaxCrossEntropyLoss() box_loss = gluon.loss.HuberLoss() acc_metric = Accuracy(axis=-1, ignore_labels=[-1]) ce_metric = mx.metric.Loss('CrossEntropy') smoothl1_metric = mx.metric.Loss('SmoothL1') # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) log_file_path = args.save_prefix + '_train.log' log_dir = os.path.dirname(log_file_path) if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) fh = logging.FileHandler(log_file_path) logger.addHandler(fh) logger.info(args) logger.info('Start training from [Epoch %d]' % args.start_epoch) best_map = [0] for epoch in range(args.start_epoch, args.epochs): while lr_steps and epoch >= lr_steps[0]: new_lr = trainer.learning_rate * lr_decay lr_steps.pop(0) trainer.set_learning_rate(new_lr) logger.info("[Epoch {}] Set learning rate to {}".format(epoch, new_lr)) acc_metric.reset() ce_metric.reset() smoothl1_metric.reset() tic = time.time() btic = time.time() net.hybridize() for i, batch in enumerate(train_data): batch_size = batch[0].shape[0] data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [] labels = [] losses1 = [] losses2 = [] losses3 = [] # temporary cls loss holder losses4 = [] # temporary box loss holder Ls = [] num_positive = [] with autograd.record(): for x, y in zip(data, label): cls_preds, box_preds, anchors = net(x) with autograd.pause(): # we generate training targets here in autograd.pause scope # because we don't need to bp to labels. This can reduce the # overhead of auto differentiation. gt_boxes = nd.slice_axis(y, axis=-1, begin=0, end=4) gt_ids = nd.slice_axis(y, axis=-1, begin=4, end=5) cls_targets, box_targets, box_masks = net.target_generator( anchors, cls_preds, gt_boxes, gt_ids) # save how many positive samples are used, it will be used to # normalize the loss num_positive.append(nd.sum(cls_targets > 0).asscalar()) # cls loss, multi class cross entropy loss, we mask out ignored # labels here by broadcast_mul the positive labels l1 = cls_loss(cls_preds, cls_targets, (cls_targets >= 0).expand_dims(axis=-1)) losses3.append(l1 * cls_targets.size / cls_targets.shape[0]) # box loss, it's a huber loss(or namely smoothl1 loss in paper) l2 = box_loss(box_preds * box_masks, box_targets) losses4.append(l2 * box_targets.size / box_targets.shape[0]) # some records for metrics outputs.append(cls_preds) labels.append(cls_targets) # n_pos is the overall positive samples in the entire batch n_pos = max(1, sum(num_positive)) for l3, l4 in zip(losses3, losses4): # normalize the losses by n_pos L = l3 / n_pos + l4 / n_pos Ls.append(L) # losses1 and losses2 are used for loss metrics losses1.append(l3 / n_pos * batch_size) # rescale for batch losses2.append(l4 / n_pos * batch_size) # rescale for batch autograd.backward(Ls) # since we have already normalized the loss, we don't want to normalize # by batch-size anymore trainer.step(1) ce_metric.update(0, losses1) smoothl1_metric.update(0, losses2) acc_metric.update(labels, outputs) if args.log_interval and not (i + 1) % args.log_interval: name1, loss1 = ce_metric.get() name2, loss2 = smoothl1_metric.get() name3, loss3 = acc_metric.get() logger.info('[Epoch %d][Batch %d], Speed: %f samples/sec, %s=%f, %s=%f, %s=%f'%( epoch, i, batch_size/(time.time()-btic), name1, loss1, name2, loss2, name3, loss3)) btic = time.time() name1, loss1 = ce_metric.get() name2, loss2 = smoothl1_metric.get() name3, loss3 = acc_metric.get() logger.info('[Epoch %d] Training cost: %f, %s=%f, %s=%f, %s=%f'%( epoch, (time.time()-tic), name1, loss1, name2, loss2, name3, loss3)) map_name, mean_ap = validate(net, val_data, ctx, classes) val_msg = '\n'.join(['%s=%f'%(k, v) for k, v in zip(map_name, mean_ap)]) logger.info('[Epoch %d] Validation: \n%s'%(epoch, val_msg)) save_params(net, best_map, mean_ap[-1], epoch, args.save_interval, args.save_prefix)
return xywh_pred NUM_EPOCHS = 40 for epoch in range(NUM_EPOCHS): train_iter.reset() for i, batch in enumerate(train_iter): x = batch.data[0].as_in_context(ctx) y = batch.label[0].as_in_context(ctx) with autograd.record(): feature = net(x) #with autograd.pause(): xywh_pred = feature_forward(feature) box_lb = nd.slice_axis(y, begin=1, end=5, axis=-1) xywh_lb = transform_center(box_lb) loss = l1_loss(xywh_pred, xywh_lb) loss.backward() trainer.step(BATCH_SIZE) if i % 50 == 0: try: #pdb.set_trace() print(xywh_pred.asnumpy()[0], xywh_lb.asnumpy()[0]) loss_record = nd.mean(loss).asscalar() print('Epoch: {0}, iter: {1}, loss: {2}'.format( epoch, i, loss_record)) except: #pdb.set_trace()