def class_and_score_forward(x):
    class_part = nd.slice_axis(x,begin=0,end=3,axis=-1)
    concentration_part = nd.slice_axis(x,begin=3,end=5,axis=-1)

    class_part = nd.sigmoid(class_part)
    concentration_part = nd.sigmoid(concentration_part)
    return class_part,concentration_part
Beispiel #2
0
    def _minimize(self, data, labels):
        lot_loss = 0

        # Create storage for batches of summed gradients
        accumulated_grads = {}
        for param_name, param in self._params.items():
            accumulated_grads[param_name] = nd.zeros_like(param)

        for start_idx in range(0, self._hyperparams['lot_size'],
                               self._batch_size):
            end_idx = min(self._hyperparams['lot_size'],
                          start_idx + self._batch_size)
            batch_data = nd.slice_axis(data,
                                       axis=0,
                                       begin=start_idx,
                                       end=end_idx)
            batch_labels = nd.slice_axis(labels,
                                         axis=0,
                                         begin=start_idx,
                                         end=end_idx)
            # compute sum of clipped gradients for this batch of this lot
            lot_loss += self._accumulate_batch_gradients(
                batch_data, batch_labels, accumulated_grads)
            # then wait for computation to finish so that memory can be cleaned up before next batch
            nd.waitall()

        # use the computed gradients to update the parameters
        self._update_params(accumulated_grads)

        # block here, since the next step will depend on this result
        return lot_loss.asscalar() / self._hyperparams['lot_size']
def test_slice_axis():
    a = create_2d_tensor(rows=SMALL_Y, columns=LARGE_X)
    c = nd.slice_axis(a, axis=0, begin=0, end=SMALL_Y//2)
    d = nd.slice_axis(a, axis=1, begin=0, end=LARGE_X//2)
    assert c.shape[0] == a.shape[0]//2
    assert d.shape[1] == a.shape[1]//2
    assert c[-1][0] == (SMALL_Y//2-1)
    assert d[-1][-1] == (SMALL_Y-1)
    def forward(self, refined_anchors, targets, num_objects):
        """Generate training targets.
        Parameters
        ----------
        refined_anchors. corner boxes. i.e. (xmin, ymin, xmax, ymax). (B, N, 4)
        targets: shape is (B, P, 5). (xmin, ymin, xmax, ymax, label)
        num_objects: shape is (B, ). the num of objects in each img.
        """
        cls_targets = []
        box_targets = []
        box_masks = []
        with autograd.pause():
            for refined_anchor, target, num_object in zip(
                    refined_anchors, targets, num_objects):
                # shape is (N, 4), (P, 5), scalar
                target = nd.slice_axis(target,
                                       axis=0,
                                       begin=0,
                                       end=num_object[0].asscalar())  # (M, 5)
                gt_id = nd.slice_axis(target, axis=1, begin=-1,
                                      end=None).reshape(
                                          (1, -1))  # (M, 1) -> (1, M)
                gt_box = nd.slice_axis(target, axis=1, begin=0,
                                       end=-1).reshape(
                                           (1, -1, 4))  # (M, 4) -> (1, M, 4)

                # ious (N, 1, M) --> (1, N, M)
                ious = nd.transpose(nd.contrib.box_iou(refined_anchor, gt_box),
                                    (1, 0, 2))
                matches = self._matcher(
                    ious
                )  # matched_object: 0<= val<= M-1, not-matched is -1. shape: (1, N)
                samples = self._sampler(
                    matches)  # object is +1,  bg is -1. ignored is 0. (1, N)

                cls_target = self._cls_encoder(samples, matches,
                                               gt_id)  # (1, N).
                # cls_targets: >1 for objects(fg); 0 for bg; -1 for ignored;

                refined_anchor = nd.expand_dims(refined_anchor,
                                                axis=0)  # (N, 4) --> (1, N, 4)
                box_target, box_mask = self._box_encoder(
                    samples, matches, refined_anchor, gt_box)  # (1, N, 4)

                cls_targets.append(cls_target)
                box_targets.append(box_target)
                box_masks.append(box_mask)
            cls_targets = nd.concat(*cls_targets, dim=0)  # (B, N)
            box_targets = nd.concat(*box_targets, dim=0)  # (B, N, 4)
            box_masks = nd.concat(
                *box_masks,
                dim=0)  # (B, N, 4). positive box are 1.0 others are 0.0.

            # cls_targets: >1 for objects(fg); 0 for bg; -1 for ignored;
            return cls_targets, box_targets, box_masks
def split_and_load_data(batch, ctx_list, batch_size):
    """
    
    :param batch: 
    :param ctx_list: 
    :param batch_size: 
    :return:
     new_batch:list of NDArray [[data1,data2,data3],[label1,label2,label3]]
        result of split data for each gpus
    """
    num_ctx = len(ctx_list)
    num_sample_pre_batch = batch_size // num_ctx
    # total_batch = batch_size*num_ctx
    new_batch = []
    # split one mini-batch to each ctx

    for i, data in enumerate(batch):
        new_data = []
        for j, ctx in enumerate(ctx_list):
            begin = j * num_sample_pre_batch
            end = min((j + 1) * num_sample_pre_batch, batch_size)
            split_data = nd.slice_axis(data, axis=0, begin=begin, end=end)
            new_data.append(split_data.as_in_context(ctx))
        new_batch.append(new_data)
    return new_batch
Beispiel #6
0
def yolo2_target(scores, boxes, labels, anchors, ignore_label=-1, thresh=0.5):
    """Generate training targets given predictions and labels.
    网络预测的输出为(32,16,16,2,5)
    而label的形式为:labels即ground truth(32,1,5),其中5包括一个class label:0,以及左上、右下两个corner相对于整张图的坐标
    模型回归的目标形式:

    注意:这里传入scores只是为了用其shape和context!


    """
    b, h, w, n, _ = scores.shape
    anchors = np.reshape(np.array(anchors), (-1, 2))
    #scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1)
    #boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1)
    gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1)
    target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label
    target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context)
    sample_weight = nd.zeros(
        (b, h, w, n, 1), ctx=scores.context
    )  #注意:sample_weight的设置:只有和真实框的IOU最大的bbox sample_weight为1 !!
    for b in range(output.shape[0]):  #b为遍历batch_size个batch中的每一个
        # find the best match for each ground-truth
        label = labels[b].asnumpy()
        # 下一句仅仅是为了过滤掉错误的(小于零)的标签
        valid_label = label[np.where(label[:, 0] > -0.5)[0], :]
        # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly
        np.random.shuffle(valid_label)
        for l in valid_label:
            gx, gy, gw, gh = (l[1] + l[3]) / 2, (
                l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2]
            ind_x = int(gx * w)  #算出第几行第几列的cell对当前groundtruth box负责
            ind_y = int(gy * h)
            tx = gx * w - ind_x  # 得出groudtruth的中心坐标相对于要负责的grid cell左上角点的偏移,【【该偏移量即模型要回归的目标数值!!!】】
            ty = gy * h - ind_y
            gw = gw * w  #得出groudtruth box 在feature map上的绝对宽度和高度  如 gw=4.23  gh=6.53
            gh = gh * h
            # find the best match using width and height only, assuming centers are identical
            intersect = np.minimum(anchors[:, 0], gw) * np.minimum(
                anchors[:, 1], gh)  #计算每个(共两个) anchor box与groundtruth bbox的交集面积
            ovps = intersect / (
                gw * gh + anchors[:, 0] * anchors[:, 1] - intersect
            )  # 计算每个(共两个) anchor box与groundtruth bbox的交并比
            best_match = int(
                np.argmax(ovps))  #哪一个预先设定的bbox形状与groundtruth bbox的形状最匹配
            target_id[b, ind_y, ind_x, best_match, :] = l[
                0]  #### 将best_match的bbox的类别设置为该groudtruth bbox的类别
            target_score[
                b, ind_y, ind_x,
                best_match, :] = 1.0  #将best_match的bbox的score赋为1,其他bbox的score都为零
            tw = np.log(gw / anchors[best_match, 0])  #【【????????????????】】
            th = np.log(gh / anchors[best_match, 1])
            target_box[b, ind_y, ind_x, best_match, :] = mx.nd.array(
                [tx, ty, tw, th])  #tx, ty, tw, th 即网络输出的四个坐标讯息
            sample_weight[b, ind_y, ind_x, best_match, :] = 1.0
            # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1])
    return target_id, target_score, target_box, sample_weight
Beispiel #7
0
    def forward(self, x):
        root = next(iter(self._structure.items()))[0]

        if (len(self._routerlayer) > 0):
            router_d, router_mat_d, weight_d, embedd_d = self._contextify(x)(
                root)

            # router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis = -1)
            # weight = nd.stack(*[weight_d[key] for key in sorted(weight_d)], axis = -1)
            #
            # embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis = 0)
            # router_mat = nd.stack(
            #   *[router_mat_d[key] for key in sorted(router_mat_d)], axis = 1)
            #
            # presence = nd.sum(router_mat, axis = 2)
            # weight_adj = presence * weight
            # depth = len(self._weightlayer) - nd.topk(nd.reverse(presence, axis = 1))
            # depth = depth - 1
            # depth = depth[:, 0]
            # remainder = 1 - nd.sum(weight_adj, axis = 1)
            #
            # if (mx.autograd.is_training()):
            #   # remainder = remainder + nd.choose_element_0index(weight_adj, depth)
            #   remainder = remainder + nd.concat(
            #     *[x[d] for d, x in zip(depth, weight_adj)], dim = 0)
            #   # weight_adj = nd.fill_element_0index(weight_adj, remainder, depth)
            #   weight_adj = nd.stack(
            #     *[nd.concat(*[y if i != d else r for i, y in enumerate(x)], dim = 0)
            #         for d, r, x in zip(depth, remainder, weight_adj)
            #       ], axis = 0)
            # else:
            #   remainder = remainder + nd.choose_element_0index(weight_adj, depth)
            #   weight_adj = nd.fill_element_0index(weight_adj, remainder, depth)
            #
            # head = nd.sum(nd.expand_dims(weight_adj, axis = 2) * router_mat, axis = 1)
            #
            # return nd.dot(head, embedd)

            embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)],
                              axis=0)
            router = nd.stack(*[router_d[key] for key in sorted(router_d)],
                              axis=-1)
            router_mat = nd.stack(
                *[router_mat_d[key] for key in sorted(router_mat_d)], axis=1)

            where = nd.argmax(nd.maximum(0, 1 / (router + 0.5)), axis=1)

            head = nd.concat(*[router_mat[i][k] for i, k in enumerate(where)],
                             dim=0)

            return nd.dot(head, embedd)

        else:
            head = nd.ones_like(nd.slice_axis(x, axis=1, begin=0, end=None))
            return self._contextify(x)(root) * head
Beispiel #8
0
        def _shard(split, x, l_fn, r_fn):
            splitsortorder = nd.argsort(split, axis=None)
            reorderedx = x[splitsortorder, :]
            reorderedsplit = split[splitsortorder]

            if (reorderedsplit[0] > 0):
                r_fn(reorderedx)
            elif (reorderedsplit[-1] < 0):
                l_fn(reorderedx)
            else:

                splitpt = nd.argsort(reorderedsplit,
                                     axis=0) * nd.sign(reorderedsplit)
                splitpt = nd.argsort(splitpt, axis=None)[0] + 1
                lx = nd.slice_axis(reorderedx, 0, 0, int(splitpt.asscalar()))
                rx = nd.slice_axis(reorderedx, 0, int(splitpt.asscalar()),
                                   None)

                l_fn(lx)
                r_fn(rx)
Beispiel #9
0
def test_ssd_custom(net, valid_iter, ctx):
    mAP = gcv.utils.metrics.voc_detection.VOC07MApMetric(
        iou_thresh=0.5,
        class_names=('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
                     'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
                     'horse', 'motorbike', 'person', 'pottedplant', 'sheep',
                     'sofa', 'train', 'tvmonitor'))
    batch_start = time.time()

    id_list, score_list, bbox_list = [], [], []
    gtbbox_list, gtid_list = [], []
    net.hybridize(static_alloc=True, static_shape=True)
    for k, batch in enumerate(valid_iter):
        X = batch[0].as_in_context(ctx)
        Y = batch[1].as_in_context(ctx)

        ids, scores, bboxes = net(X)
        gt_bboxes = nd.slice_axis(Y, axis=-1, begin=1, end=None)
        gt_ids = nd.slice_axis(Y, axis=-1, begin=0, end=1)

        id_list.append(ids)
        score_list.append(scores)
        bbox_list.append(bboxes)
        gtid_list.append(gt_ids)
        gtbbox_list.append(gt_bboxes)
        mAP.update(pred_bboxes=bbox_list,
                   pred_labels=id_list,
                   pred_scores=score_list,
                   gt_bboxes=gtbbox_list,
                   gt_labels=gtid_list)
        id_list, score_list, bbox_list = [], [], []
        gtbbox_list, gtid_list = [], []
        logger.info("test batch {} speeds {}".format(
            k, X.shape[0] / (time.time() - batch_start)))
        batch_start = time.time()

    names, values = mAP.get()
    for name, value in zip(names, values):
        logger.info("{} {}".format(name, value))
    return values[-1]
    def forward(self, scores, offsets, anchors, img):
        # 训练和预测的处理流程不同
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms
        with autograd.pause():
            # 将预测的偏移量加到anchors中
            rois = self._bbox_decoder(offsets, self._bbox_tocenter(anchors))
            rois = self._cliper(rois, img)

            # 下面将所有尺寸小于设定最小值的ROI去除
            x_min, y_min, x_max, y_max = nd.split(rois, num_outputs=4, axis=-1)
            width = x_max - x_min
            height = y_max - y_min
            invalid_mask = (width < self._min_size) + (height < self._min_size)

            # 将对应位置的score 设为-1
            scores = nd.where(invalid_mask, nd.ones_like(scores) * -1, scores)
            invalid_mask = nd.repeat(invalid_mask, repeats=4, axis=-1)
            rois = nd.where(invalid_mask, nd.ones_like(rois) * -1, rois)

            # 下面进行NMS操作
            pre = nd.concat(scores, rois, dim=-1)
            pre = nd.contrib.box_nms(pre,
                                     overlap_thresh=self._nms_thresh,
                                     topk=pre_nms,
                                     coord_start=1,
                                     score_index=0,
                                     id_index=-1,
                                     force_suppress=True)
            # 下面进行采样
            result = nd.slice_axis(pre, axis=1, begin=0, end=post_nms)
            rpn_score = nd.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_bbox = nd.slice_axis(result, axis=-1, begin=1, end=None)

        return rpn_score, rpn_bbox
Beispiel #11
0
    def calc_auc(self, label, output):
        output_exp = output.exp()
        paratition = output_exp.sum(axis=1, keepdims=True)
        score = output_exp / paratition
        score = nd.slice_axis(score, axis=1, begin=1, end=2)

        if self.global_score is None:
            # for first time
            self.global_score = score
            self.global_lable = label
        else:
            self.global_score = nd.concat(self.global_score, score, dim=0)
            self.global_lable = nd.concat(self.global_lable, label, dim=0)
Beispiel #12
0
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None):

    xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1)
    bbox_width = xmax - xmin + 1.
    bbox_height = ymax - ymin + 1.
    center_x = 0.5 * (xmin + xmax)
    center_y = 0.5 * (ymin + ymax)

    bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4))
    dx, dy, dw, dh = nd.split(data=bbox_delta_reshape,
                              num_outputs=4,
                              axis=2,
                              squeeze_axis=1)
    if (means is not None) and (stds is not None):
        dx = dx * stds[0] + means[0]
        dy = dy * stds[1] + means[1]
        dw = dw * stds[2] + means[2]
        dh = dh * stds[3] + means[3]

    refine_center_x = nd.broadcast_add(lhs=center_x,
                                       rhs=nd.broadcast_mul(lhs=bbox_width,
                                                            rhs=dx))
    refine_center_y = nd.broadcast_add(lhs=center_y,
                                       rhs=nd.broadcast_mul(lhs=bbox_height,
                                                            rhs=dy))
    refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw))
    refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh))
    w_offset = 0.5 * (refined_width - 1.)
    h_offset = 0.5 * (refined_height - 1.)
    refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1)
    refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1)
    refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1)
    refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1)

    refined_bbox = nd.concat(refined_xmin,
                             refined_ymin,
                             refined_xmax,
                             refined_ymax,
                             dim=1)
    if im_info is not None:
        # assume im_info [[height, width, scale]] with shape (1,3)
        im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2)
        im_wh = nd.reverse(im_hw, axis=1)
        im_wh = im_wh - 1.
        im_wh = nd.tile(data=im_wh, reps=(1, 2))
        im_wh = nd.Reshape(im_wh, shape=(1, 4, 1))
        refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh)
        refined_bbox = nd.broadcast_maximum(lhs=refined_bbox,
                                            rhs=nd.zeros_like(refined_bbox))
    # print refined_bbox.debug_str()
    return refined_bbox
Beispiel #13
0
    def forward(self, anchors, pred_classes, pred_bboxes):
        """
        :param anchors: (1, num-of-anchor, 4), anchors[0,0,:] = x0,y0,x1,y1
        :param pred_classes: (batch-size, num-of-anchor, num-of-classes), including background
        :param pred_bboxes: (batch-size, num-of-anchor * 4)

        :param ids: (batch-size, num-of-found, 1)  class id for each found
        :param scores: (batch-size, num-of-found, 1)  class score for each found
        :param bboxes: (batch-size, num-of-found, 4)  coordinates of each found  (x0,y0,x1,y1) with norm w/h
        """
        anchors = self.corner2center(anchors)
        pred_bboxes = nd.reshape(pred_bboxes, (0, -1, 4))
        bboxes = self.bbox_decoder(pred_bboxes, anchors)
        cls_ids, scores = self.cls_decoder(nd.softmax(pred_classes, axis=-1))
        results = []
        for i in range(self.num_classes):
            cls_id = cls_ids.slice_axis(axis=-1, begin=i, end=i + 1)
            score = scores.slice_axis(axis=-1, begin=i, end=i + 1)
            # per class results
            per_result = nd.concat(*[cls_id, score, bboxes], dim=-1)
            results.append(per_result)
        result = nd.concat(*results, dim=1)
        if self.nms_thresh > 0 and self.nms_thresh < 1:
            result = nd.contrib.box_nms(result,
                                        overlap_thresh=self.nms_thresh,
                                        topk=self.nms_topk,
                                        valid_thresh=0.01,
                                        id_index=0,
                                        score_index=1,
                                        coord_start=2,
                                        force_suppress=False)
            if self.post_nms > 0:
                result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
        ids = nd.slice_axis(result, axis=2, begin=0, end=1)
        scores = nd.slice_axis(result, axis=2, begin=1, end=2)
        bboxes = nd.slice_axis(result, axis=2, begin=2, end=6)
        return ids, scores, bboxes
Beispiel #14
0
    def forward(self, x):
        root = next(iter(self._structure.items()))[0]

        if (len(self._routerlayer) > 0):
            router_d, embedd_d = self._contextify(x)(root)

            embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)],
                              axis=0)
            router = nd.stack(*[router_d[key] for key in sorted(router_d)],
                              axis=-1)

            return nd.dot(router, embedd)

        else:
            head = nd.ones_like(nd.slice_axis(x, axis=1, begin=0, end=None))
            return self._contextify(x)(root) * head
Beispiel #15
0
    def _convert_score(self, score):
        """from cls to score

        Parameters
        ----------
            score : ndarray
                network output

        Returns
        -------
            get feature map score though softmax
        """
        score = nd.transpose(score, axes=(1, 2, 3, 0))
        score = nd.reshape(score, shape=(2, -1))
        score = nd.transpose(score, axes=(1, 0))
        score = nd.softmax(score, axis=1)
        score = nd.slice_axis(score, axis=1, begin=1, end=2)
        score = nd.squeeze(score, axis=1)
        return score.asnumpy()
Beispiel #16
0
 def __getitem__(self, idx):
     img_path_512 = self._img_512.format(self.img_paths[idx])
     img_path_256 = self._img_256.format(self.img_paths[idx])
     img_path_128 = self._img_128.format(self.img_paths[idx])
     lbl_path_256 = self._lbl_256.format(self.img_paths[idx])
     mask_path_512 = self._mask_512.format(self.img_paths[idx])
     lbl_path_512 = self._lbl_512.format(self.img_paths[idx])
     img_arr_256 = mx.image.imread(img_path_256).astype(
         np.float32) / 127.5 - 1
     img_arr_512 = mx.image.imread(img_path_512).astype(
         np.float32) / 127.5 - 1
     img_arr_128 = mx.image.imread(img_path_128).astype(
         np.float32) / 127.5 - 1
     img_arr_512 = mx.image.imresize(img_arr_512, img_wd * 2, img_ht)
     img_arr_in_512, img_arr_out_512 = [
         mx.image.fixed_crop(img_arr_512, 0, 0, img_wd, img_ht),
         mx.image.fixed_crop(img_arr_512, img_wd, 0, img_wd, img_ht)
     ]
     if os.path.exists(mask_path_512):
         mask_512 = mx.image.imread(mask_path_512)
     else:
         mask_512 = mx.image.imread(mask_path_512.replace(
             ".png", '.jpg', 1))
     tep_mask_512 = nd.slice_axis(mask_512, axis=2, begin=0, end=1) / 255
     if self.is_transform:
         imgs = [
             img_arr_out_512, img_arr_in_512, tep_mask_512, img_arr_256,
             img_arr_128
         ]
         imgs = random_horizontal_flip(imgs)
         imgs = random_rotate(imgs)
         img_arr_out_512, img_arr_in_512, tep_mask_512, img_arr_256, img_arr_128 = imgs[
             0], imgs[1], imgs[2], imgs[3], imgs[4]
     img_arr_in_512, img_arr_out_512 = [
         nd.transpose(img_arr_in_512, (2, 0, 1)),
         nd.transpose(img_arr_out_512, (2, 0, 1))
     ]
     img_arr_out_256 = nd.transpose(img_arr_256, (2, 0, 1))
     img_arr_out_128 = nd.transpose(img_arr_128, (2, 0, 1))
     tep_mask_512 = tep_mask_512.reshape(tep_mask_512.shape[0],
                                         tep_mask_512.shape[1], 1)
     tep_mask_512 = nd.transpose(tep_mask_512, (2, 0, 1))
     return img_arr_out_512, img_arr_in_512, tep_mask_512, img_arr_out_256, img_arr_out_128
Beispiel #17
0
def yolo2_target(scores, boxes, labels, anchors, ignore_label=-1, thresh=0.5):
    """Generate training targets given predictions and labels."""
    b, h, w, n, _ = scores.shape
    anchors = np.reshape(np.array(anchors), (-1, 2))
    #scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1)
    #boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1)
    gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1)
    target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label
    target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context)
    sample_weight = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    for b in range(output.shape[0]):
        # find the best match for each ground-truth
        label = labels[b].asnumpy()
        valid_label = label[np.where(label[:, 0] > -0.5)[0], :]
        # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly
        np.random.shuffle(valid_label)
        for l in valid_label:
            gx, gy, gw, gh = (l[1] + l[3]) / 2, (
                l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2]
            ind_x = int(gx * w)
            ind_y = int(gy * h)
            tx = gx * w - ind_x
            ty = gy * h - ind_y
            gw = gw * w
            gh = gh * h
            # find the best match using width and height only, assuming centers are identical
            intersect = np.minimum(anchors[:, 0], gw) * np.minimum(
                anchors[:, 1], gh)
            ovps = intersect / (gw * gh + anchors[:, 0] * anchors[:, 1] -
                                intersect)
            best_match = int(np.argmax(ovps))
            target_id[b, ind_y, ind_x, best_match, :] = l[0]
            target_score[b, ind_y, ind_x, best_match, :] = 1.0
            tw = np.log(gw / anchors[best_match, 0])
            th = np.log(gh / anchors[best_match, 1])
            target_box[b, ind_y, ind_x,
                       best_match, :] = mx.nd.array([tx, ty, tw, th])
            sample_weight[b, ind_y, ind_x, best_match, :] = 1.0
            # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1])
    return target_id, target_score, target_box, sample_weight
Beispiel #18
0
def validate(net, val_data, val_items, val_shapes, ctx, size, classes):
    """Test on validation dataset."""
    clipper = gcv.nn.bbox.BBoxClipToImage()
    net.hybridize(static_alloc=True)
    print("---Detect Total {:d} Image Start.---".format(len(val_items)))

    result_dict = {}
    for ib, (batch, item) in enumerate(zip(val_data, val_items)):
        batch = split_and_load(batch, ctx_list=ctx)
        for x, y, im_scale in zip(*batch):
            ids, scores, bboxes = net(x)
            bboxes = clipper(bboxes, x)
            im_scale = im_scale.reshape((-1)).asscalar()
            bboxes *= im_scale
            inds = nd.argsort(nd.squeeze(ids, axis=(0, 2)), is_ascend=False)
            ids = nd.squeeze(ids,
                             axis=(0, 2)).asnumpy().astype(np.int8).tolist()
            valid_ids = [id for id in ids if id is not -1]
            valid_len = len(valid_ids)
            if valid_len > 0:  # valid_len must > 0
                inds = nd.slice_axis(inds, begin=0, end=valid_len, axis=0)
                scores = nd.take(scores, inds, axis=1)
                bboxes = nd.take(bboxes, inds, axis=1)
                scores = scores.asnumpy()
                bboxes = bboxes.asnumpy()
                for i, id in enumerate(valid_ids):
                    score = scores[:, i, 0][0]
                    xmin, ymin, xmax, ymax = bboxes[:, i, 0][
                        0], bboxes[:, i, 1][0], bboxes[:, i,
                                                       2][0], bboxes[:, i,
                                                                     3][0]
                    result_dict[id] = result_dict.get(
                        id, []) + [[item, score, xmin, ymin, xmax, ymax]]
                print("Detect Image {:s} Done.".format(item))
    print("---Detect Total {:d} Image Done.---".format(len(val_items)))
    return result_dict
Beispiel #19
0
    def forward(self, x):
        root = next(iter(self._structure.items()))[0]

        if (len(self._routerlayer) > 0):
            router, router_mat, weight, embedd = self._contextify(x)(root)

            presence = nd.sum(router_mat, axis=2)
            weight_adj = presence * weight
            depth = len(self._weightlayer) - nd.topk(
                nd.reverse(presence, axis=1))
            depth -= 1
            depth = depth[:, 0]
            remainder = 1 - nd.sum(weight_adj, axis=1)
            remainder += nd.choose_element_0index(weight_adj, depth)
            weight_adj = nd.fill_element_0index(weight_adj, remainder, depth)

            head = nd.sum(nd.expand_dims(weight_adj, axis=2) * router_mat,
                          axis=1)

            return nd.expand_dims(nd.dot(head, embedd), axis=-1)

        else:
            head = nd.ones_like(nd.slice_axis(x, axis=1, begin=0, end=None))
            return self._contextify(x)(root) * head
Beispiel #20
0
def test_slice_axis():
    a = create_vector(size=LARGE_X)
    med = LARGE_X // 2
    c = nd.slice_axis(a, axis=0, begin=0, end=med)
    assert c.shape[0] == a.shape[0] // 2
    assert c[-1][0] == (med - 1)
                else:
                    mod.init_params(arg_params=shared_params,
                                    aux_params=None,
                                    allow_missing=True)
            else:
                mod.set_params(arg_params=dict(shared_params.items() +
                                               fc6_params[k].items()),
                               aux_params=None)

            if (not mod.optimizer_initialized):
                mod.init_optimizer(optimizer='sgd',
                                   optimizer_params=optimizer_params)
            # 训练模型
            mod.forward(data)
            pos_score = nd.slice_axis(mod.get_outputs()[0],
                                      axis=0,
                                      begin=0,
                                      end=32)
            neg_score = nd.slice_axis(mod.get_outputs()[0],
                                      axis=0,
                                      begin=32,
                                      end=128)
            mod.backward()
            mod.update()

            shared_params = {
                'conv1_weight': mod.get_params()[0]['conv1_weight'],
                'conv1_bias': mod.get_params()[0]['conv1_bias'],
                'conv2_weight': mod.get_params()[0]['conv2_weight'],
                'conv2_bias': mod.get_params()[0]['conv2_bias'],
                'conv3_weight': mod.get_params()[0]['conv3_weight'],
                'conv3_bias': mod.get_params()[0]['conv3_bias'],
Beispiel #22
0
    def forward(self, cls_targets, ctr_targets, box_targets, mask_targets,
                matches, cls_preds, ctr_preds, box_preds, mask_preds,
                maskcoe_preds):
        """Compute loss in entire batch across devices."""
        scale = 4
        # require results across different devices at this time
        cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds = \
            [_as_list(x) for x in (cls_targets, ctr_targets, box_targets, mask_targets, matches,
                                   cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds)]
        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        ctr_losses = []
        box_losses = []
        mask_losses = []
        sum_losses = []
        for clst, ctrt, boxt, maskt, matche, clsp, ctrp, boxp, maskp, maskcoep in zip(
                *[
                    cls_targets, ctr_targets, box_targets, mask_targets,
                    matches, cls_preds, ctr_preds, box_preds, mask_preds,
                    maskcoe_preds
                ]):

            pos_gt_mask = clst > 0
            # cls loss
            if not self._from_logits:
                clsp = nd.sigmoid(clsp)
            one_hot = nd.one_hot(clst, self._num_class)
            one_hot = nd.slice_axis(one_hot, begin=1, end=None, axis=-1)
            pt = nd.where(one_hot, clsp, 1 - clsp)
            t = nd.ones_like(one_hot)
            alpha = nd.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
            cls_loss = -alpha * (
                (1 - pt)**self._gamma) * nd.log(nd.minimum(pt + self._eps, 1))
            cls_loss = nd.sum(cls_loss) / nd.maximum(nd.sum(pos_gt_mask), 1)
            cls_losses.append(cls_loss)

            # ctr loss
            ctrp = nd.squeeze(ctrp, axis=-1)
            pos_pred_mask = ctrp >= 0
            ctr_loss = (ctrp * pos_pred_mask - ctrp * ctrt +
                        nd.log(1 + nd.exp(-nd.abs(ctrp)))) * pos_gt_mask
            ctr_loss = nd.sum(ctr_loss) / nd.maximum(nd.sum(pos_gt_mask), 1)
            ctr_losses.append(ctr_loss)

            # box loss // iou loss
            px1, py1, px2, py2 = nd.split(boxp,
                                          num_outputs=4,
                                          axis=-1,
                                          squeeze_axis=True)
            gx1, gy1, gx2, gy2 = nd.split(boxt,
                                          num_outputs=4,
                                          axis=-1,
                                          squeeze_axis=True)
            apd = nd.abs(px2 - px1 + 1) * nd.abs(py2 - py1 + 1)
            agt = nd.abs(gx2 - gx1 + 1) * nd.abs(gy2 - gy1 + 1)
            iw = nd.maximum(
                nd.minimum(px2, gx2) - nd.maximum(px1, gx1) + 1., 0.)
            ih = nd.maximum(
                nd.minimum(py2, gy2) - nd.maximum(py1, gy1) + 1., 0.)
            ain = iw * ih + 1.
            union = apd + agt - ain + 1
            ious = nd.maximum(ain / union, 0.)
            fg_mask = nd.where(clst > 0, nd.ones_like(clst),
                               nd.zeros_like(clst))
            box_loss = -nd.log(nd.minimum(ious + self._eps, 1.)) * fg_mask
            if self._return_iou:
                box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask),
                                                         1), ious
            else:
                box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask), 1)
            box_losses.append(box_loss)

            # mask loss
            rank = (-matche).argsort(axis=-1)
            rank = nd.split(rank, 2, axis=0, squeeze_axis=True)
            matche = nd.split(matche, 2, axis=0, squeeze_axis=True)
            maskp = nd.split(maskp, 2, axis=0, squeeze_axis=True)
            maskt = nd.split(maskt, 2, axis=0, squeeze_axis=True)
            boxt = nd.split(boxt, 2, axis=0, squeeze_axis=True)
            maskcoep = nd.split(maskcoep, 2, axis=0, squeeze_axis=True)
            agt = nd.split(agt, 2, axis=0, squeeze_axis=True)
            mask_loss = []
            for ranki, matchei, maskpi, maskti, boxti, maskcoepi, agti in zip(
                    rank, matche, maskp, maskt, boxt, maskcoep, agt):
                idx = nd.slice(ranki, 0, 200)
                pos_mask = nd.take(matchei >= 0, idx)
                pos_box = nd.take(boxti, idx)
                area = nd.take(agti, idx)
                weight = (self.gt_weidth * self.gt_height /
                          (area + self._eps)) * pos_mask
                mask_idx = nd.take(matchei, idx)
                maskti = nd.take(maskti, mask_idx)
                maskpi = nd.dot(nd.take(maskcoepi, idx), maskpi)
                maskpi = nd.sigmoid(maskpi)
                with autograd.pause():
                    _h = nd.arange(186, ctx=maskpi.context)
                    _w = nd.arange(186, ctx=maskpi.context)
                    _h = nd.tile(_h, reps=(pos_box.shape[0], 1))
                    _w = nd.tile(_w, reps=(pos_box.shape[0], 1))
                    x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale),
                                              num_outputs=4,
                                              axis=-1)
                    _w = (_w >= x1) * (_w <= x2)
                    _h = (_h >= y1) * (_h <= y2)
                    _mask = nd.batch_dot(_h.expand_dims(axis=-1),
                                         _w.expand_dims(axis=-1),
                                         transpose_b=True)
                maskpi = maskpi * _mask
                mask_loss.append(
                    nd.sum(self.SBCELoss(maskpi, maskti) * weight) /
                    nd.sum(pos_mask + self._eps))

            # if sum(pos_num)>1400:
            #     print(sum(pos_num))
            #     print(pos_num)
            # pos_num = (matche >=0).sum(axis=-1).asnumpy()
            # rank = (-matche).argsort(axis=-1)
            # mask_loss = []
            # for i in range(maskp.shape[0]):
            #     if pos_num[i] == 0.:
            #         # print(pos_num)
            #         mask_loss.append(nd.zeros(shape=(1,), ctx=maskp.context))
            #         continue
            #     idx = rank[i, :int(pos_num[i])]
            #     pos_box = nd.take(boxt[i], idx)
            #     area = (pos_box[:, 3] - pos_box[:, 1]) * (pos_box[:, 2] - pos_box[:, 0])
            #     weight = self.gt_weidth * self.gt_height / (area+self._eps)
            #     maskti = maskt[i, matche[i, idx], :, :]
            #     maskpi = nd.dot(nd.take(maskcoep[i], idx), maskp[i])
            #     _, h, w = maskpi.shape
            #     maskpi = nd.sigmoid(maskpi)
            #     with autograd.pause():
            #         _h = nd.arange(h, ctx=maskpi.context)
            #         _w = nd.arange(w, ctx=maskpi.context)
            #         _h = nd.tile(_h, reps=(pos_box.shape[0], 1))
            #         _w = nd.tile(_w, reps=(pos_box.shape[0], 1))
            #         x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale), num_outputs=4, axis=-1)
            #         _w = (_w >= x1) * (_w <= x2)
            #         _h = (_h >= y1) * (_h <= y2)
            #         _mask = nd.batch_dot(_h.expand_dims(axis=-1), _w.expand_dims(axis=-1), transpose_b=True)
            #     maskpi = maskpi * _mask
            #     mask_loss.append(nd.sum(self.SBCELoss(maskpi, maskti) * weight)/pos_num[i])
            mask_loss = nd.mean(nd.concat(*mask_loss, dim=0))
            mask_losses.append(mask_loss)
            sum_losses.append(self._cls_lambd * cls_losses[-1] +
                              self._ctr_lambd * ctr_losses[-1] +
                              self._box_lambd * box_losses[-1] +
                              self._mask_lambd * mask_losses[-1])

        return sum_losses, cls_losses, ctr_losses, box_losses, mask_losses
Beispiel #23
0
    def forward(self,
                anchors,
                pred_classes,
                pred_bboxes,
                groundtruth,
                data=None):
        """
        :param anchors: (1, num-of-anchor, 4), anchors[0,0,:] = cx,cy,w,h
        :param pred_classes: (batch-size, num-of-anchor, num-of-classes), including background
        :param pred_bboxes: (batch-size, num-of-anchor * 4) ------------------useless
        :param groundtruth: (batch-size, max-object-of-one-image, 5), groundtruth[0,0,:] = (cls,x0,y0,x1,y1),
                            (x0,y0,x1,y1) normalized by image size
        :return:
        cls_targets: (batch-size, num-of-anchor, num-of-classes), cls_targets[i,j] = (cls_id+1 for anchor j in image i), including background as class 0
        bbox_targets: (batch-size, num-of-anchor, 4), bbox_targets[i,j,:] = (offset of anchor j in image i) (center mode)
        bbox_masks: (batch-size, num-of-anchor, 4),bbox_mask[i,j,:] = (mask value of anchor j in image i)
        """
        #anchors = self.center_to_corner(anchors.reshape(-1,4))
        anchors = nd.squeeze(anchors)
        gt_bboxes = nd.slice_axis(groundtruth, axis=-1, begin=1, end=None)

        gt_classes = nd.slice_axis(groundtruth, axis=-1, begin=0, end=1)
        ious = nd.transpose(
            nd.contrib.box_iou(anchors, gt_bboxes, format='corner'), (1, 0, 2))
        matches = self.matcher(
            ious
        )  #matches: (batch-size, num-of-anchor), matches[i,j] = (idx-of-object in image i matched with anchor j)
        samples = self.sampler(
            matches, pred_classes,
            ious)  #(batch-size, num-of-anchor), samples[i,j] = -1 or 1

        if data is not None:
            img = nd.clip(nd.transpose(data[0], (1, 2, 0)) * 255.0, 0,
                          255).asnumpy().astype(np.uint8)
            H, W, C = img.shape
            bboxes = gt_bboxes[0]
            for row in range(bboxes.shape[0]):
                x0, y0, x1, y1 = bboxes[row, :].asnumpy().tolist()
                if x0 < 0:
                    continue
                x0, x1 = int(x0 * W), int(x1 * W)
                y0, y1 = int(y0 * H), int(y1 * H)
                cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2)

            if 0:
                for row in range(anchors.shape[0]):
                    x0, y0, x1, y1 = anchors[row].asnumpy().tolist()
                    if x0 < 0:
                        continue
                    print('sz = {} ratio = {}'.format((x1 - x0) * (y1 - y0),
                                                      (x1 - x0) / (y1 - y0)))

            for row in range(matches[0].shape[0]):
                if samples[0, row] < 1:
                    continue
                idx = matches[0, row]
                if idx < 0:
                    #if idx == 0:
                    continue
                x0, y0, x1, y1 = anchors[row].asnumpy().tolist()
                x0, x1 = int(x0 * W), int(x1 * W)
                y0, y1 = int(y0 * H), int(y1 * H)
                cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 1)
            cv2.imwrite("vis.jpg", img)
            cv2.imshow("vis", img)
            cv2.waitKey(-1)

        cls_targets = self.cls_encoder(
            samples, matches, gt_classes
        )  #(batch-size, num-of-anchor) cls_targets[i,j] = (cls_id+1 for anchor j in image i)
        bbox_targets, bbox_masks = self.bbox_encoder(
            samples, matches, anchors,
            gt_bboxes)  #(batch-size, num-of-anchor, 4)
        #bbox_targets[i,j,:] = (offset of anchor j in image i)
        #bbox_mask[i,j,:] = (mask value of anchor j in image i)
        return cls_targets, bbox_targets, bbox_masks
Beispiel #24
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms_start_time = time.time()
        #inputs
        cls_score = in_data[0]
        bbox_pred = in_data[1]
        rois = in_data[2]
        im_info = in_data[3]
        fc_all_2_relu = in_data[4]
        nms_rank_weight = in_data[5]
        nms_rank_bias = in_data[6]
        roi_feat_embedding_weight = in_data[7]
        roi_feat_embedding_bias = in_data[8]
        nms_pair_pos_fc1_1_weight = in_data[9]
        nms_pair_pos_fc1_1_bias = in_data[10]
        nms_query_1_weight = in_data[11]
        nms_query_1_bias = in_data[12]
        nms_key_1_weight = in_data[13]
        nms_key_1_bias = in_data[14]
        nms_linear_out_1_weight = in_data[15]
        nms_linear_out_1_bias = in_data[16]
        nms_logit_weight = in_data[17]
        nms_logit_bias = in_data[18]
        if self.has_non_gt_index:
            non_gt_index = in_data[19]
        else:
            non_gt_index = None

        if self.nongt_dim is not None:
            cls_score_nongt = nd.slice_axis(data=cls_score,
                                            axis=0,
                                            begin=0,
                                            end=self.nongt_dim)
            # cls_score_nongt = monitor_wrapper(cls_score_nongt, 'cls_score_nongt')
            bbox_pred_nongt = nd.slice_axis(data=bbox_pred,
                                            axis=0,
                                            begin=0,
                                            end=self.nongt_dim)
        elif non_gt_index is not None:
            cls_score_nongt = nd.take(a=cls_score, indices=non_gt_index)
            bbox_pred_nongt = nd.take(a=bbox_pred, indices=non_gt_index)
        else:
            cls_score_nongt = cls_score
            bbox_pred_nongt = bbox_pred
        bbox_pred_nongt = nd.BlockGrad(bbox_pred_nongt)

        # remove batch idx and gt roi
        sliced_rois = nd.slice_axis(data=rois, axis=1, begin=1, end=None)
        if self.nongt_dim is not None:
            sliced_rois = nd.slice_axis(data=sliced_rois,
                                        axis=0,
                                        begin=0,
                                        end=self.nongt_dim)
        elif non_gt_index is not None:
            sliced_rois = nd.take(a=sliced_rois, indices=non_gt_index)
        # bbox_pred_nobg, [num_rois, 4*(num_reg_classes-1)]
        bbox_pred_nobg = nd.slice_axis(data=bbox_pred_nongt,
                                       axis=1,
                                       begin=4,
                                       end=None)
        # [num_boxes, 4, num_reg_classes-1]
        refined_bbox = refine_bbox_nd(sliced_rois,
                                      bbox_pred_nobg,
                                      im_info,
                                      means=self.bbox_means,
                                      stds=self.bbox_stds)
        # softmax cls_score to cls_prob, [num_rois, num_classes]
        cls_prob = nd.softmax(data=cls_score_nongt, axis=-1)
        cls_prob_nobg = nd.slice_axis(cls_prob, axis=1, begin=1, end=None)
        sorted_cls_prob_nobg = nd.sort(data=cls_prob_nobg,
                                       axis=0,
                                       is_ascend=False)
        # sorted_score, [first_n, num_fg_classes]
        sorted_score = nd.slice_axis(sorted_cls_prob_nobg,
                                     axis=0,
                                     begin=0,
                                     end=self.first_n,
                                     name='sorted_score')
        max_score_per_class = sorted_score.max(axis=0)
        max_score_per_class_numpy = max_score_per_class.asnumpy()

        valid_class_thresh = self.class_thresh
        valid_class_thresh = np.minimum(valid_class_thresh,
                                        max_score_per_class_numpy.max())
        valid_class_indices = np.where(
            max_score_per_class_numpy >= valid_class_thresh)[0]
        invalid_class_indices = np.where(
            max_score_per_class_numpy < valid_class_thresh)[0]
        num_valid_classes = len(valid_class_indices)
        valid_class_indices_nd = nd.array(valid_class_indices,
                                          ctx=sorted_score.context)

        # sort by score
        rank_indices = nd.argsort(data=cls_prob_nobg, axis=0, is_ascend=False)
        # first_rank_indices, [first_n, num_fg_classes]
        first_rank_indices = nd.slice_axis(rank_indices,
                                           axis=0,
                                           begin=0,
                                           end=self.first_n)
        valid_first_rank_indices = first_rank_indices.transpose().take(
            valid_class_indices_nd).transpose()

        # sorted_bbox, [first_n, num_fg_classes, 4, num_reg_classes-1]
        sorted_bbox = nd.take(a=refined_bbox, indices=first_rank_indices)
        if self.class_agnostic:
            # sorted_bbox, [first_n, num_fg_classes, 4]
            sorted_bbox = nd.Reshape(sorted_bbox,
                                     shape=(0, 0, 0),
                                     name='sorted_bbox')
        else:
            cls_mask = nd.arange(0, self.num_fg_classes)
            cls_mask = nd.Reshape(cls_mask, shape=(1, -1, 1))
            cls_mask = nd.broadcast_to(cls_mask, shape=(self.first_n, 0, 4))
            # sorted_bbox, [first_n, num_fg_classes, 4]
            sorted_bbox = nd.pick(data=sorted_bbox,
                                  name='sorted_bbox',
                                  index=cls_mask,
                                  axis=3)

        valid_sorted_bbox = sorted_bbox.transpose(
            (1, 0, 2)).take(valid_class_indices_nd).transpose((1, 0, 2))

        # sorted_bbox = monitor_wrapper(sorted_bbox, 'sorted_bbox')
        # nms_rank_embedding, [first_n, 1024]
        nms_rank_embedding = extract_rank_embedding_nd(self.first_n, 1024)
        # nms_rank_feat, [first_n, 1024]
        nms_rank_feat = nd.FullyConnected(name='nms_rank',
                                          data=nms_rank_embedding,
                                          num_hidden=128,
                                          weight=nms_rank_weight,
                                          bias=nms_rank_bias)
        # nms_position_matrix, [num_valid_classes, first_n, first_n, 4]
        nms_position_matrix = extract_multi_position_matrix_nd(
            valid_sorted_bbox)
        # roi_feature_embedding, [num_rois, 1024]
        # fc_all_2_relu = monitor_wrapper(fc_all_2_relu, 'fc_all_2_relu')
        roi_feat_embedding = nd.FullyConnected(
            name='roi_feat_embedding',
            data=fc_all_2_relu,
            num_hidden=128,
            weight=roi_feat_embedding_weight,
            bias=roi_feat_embedding_bias)
        # sorted_roi_feat, [first_n, num_valid_classes, 128]
        sorted_roi_feat = nd.take(a=roi_feat_embedding,
                                  indices=valid_first_rank_indices)

        # vectorized nms
        # nms_embedding_feat, [first_n, num_valid_classes, 128]
        nms_embedding_feat = nd.broadcast_add(lhs=sorted_roi_feat,
                                              rhs=nd.expand_dims(nms_rank_feat,
                                                                 axis=1))
        # nms_attention_1, [first_n, num_valid_classes, 1024]
        nms_attention_1 = nms_attention_nd(
            nms_embedding_feat,
            nms_position_matrix,
            nms_pair_pos_fc1_1_weight,
            nms_pair_pos_fc1_1_bias,
            nms_query_1_weight,
            nms_query_1_bias,
            nms_key_1_weight,
            nms_key_1_bias,
            nms_linear_out_1_weight,
            nms_linear_out_1_bias,
            num_rois=self.first_n,
            index=1,
            group=self.nms_attention_group,
            dim=self.nms_attention_dim,
            fc_dim=self.nms_attention_fc_dim,
            feat_dim=self.nms_attention_feat_dim)
        nms_all_feat_1 = nms_embedding_feat + nms_attention_1
        nms_all_feat_1_relu = nd.Activation(data=nms_all_feat_1,
                                            act_type='relu',
                                            name='nms_all_feat_1_relu')
        # [first_n * num_valid_classes, 1024]
        nms_all_feat_1_relu_reshape = nd.Reshape(nms_all_feat_1_relu,
                                                 shape=(-3, -2))
        # logit, [first_n * num_valid_classes, num_thresh]
        nms_conditional_logit = nd.FullyConnected(
            name='nms_logit',
            data=nms_all_feat_1_relu_reshape,
            num_hidden=self.num_thresh,
            weight=nms_logit_weight,
            bias=nms_logit_bias)
        # logit_reshape, [first_n, num_valid_classes, num_thresh]
        nms_conditional_logit_reshape = nd.Reshape(nms_conditional_logit,
                                                   shape=(self.first_n,
                                                          num_valid_classes,
                                                          self.num_thresh))
        nms_conditional_score = nd.Activation(
            data=nms_conditional_logit_reshape,
            act_type='sigmoid',
            name='nms_conditional_score')
        if num_valid_classes == self.num_fg_classes:
            full_nms_conditional_score = nms_conditional_score
        else:
            full_nms_conditional_score = nd.concat(
                nms_conditional_score,
                nd.zeros(
                    (self.first_n, self.num_fg_classes - num_valid_classes,
                     self.num_thresh),
                    ctx=nms_conditional_score.context),
                dim=1)

        all_indexes = np.concatenate(
            (valid_class_indices, invalid_class_indices))
        restore_indexes = np.zeros((self.num_fg_classes))
        restore_indexes[all_indexes] = np.arange(self.num_fg_classes)
        restore_indexes = nd.array(restore_indexes,
                                   ctx=nms_conditional_score.context)
        full_nms_conditional_score = full_nms_conditional_score.transpose(
            (1, 0, 2)).take(restore_indexes).transpose((1, 0, 2))

        sorted_score_reshape = nd.expand_dims(sorted_score, axis=2)
        # sorted_score_reshape = nd.BlockGrad(sorted_score_reshape)
        nms_multi_score = nd.broadcast_mul(lhs=sorted_score_reshape,
                                           rhs=full_nms_conditional_score)
        _ = nms_multi_score.mean().asnumpy()

        all_time = time.time() - nms_start_time
        if 'learn_nms_time' not in globals().keys(
        ) or 'learn_nms_count' not in globals().keys():
            globals()['learn_nms_time'] = []
            globals()['learn_nms_count'] = 0

        if globals()['learn_nms_count'] >= 1000:
            globals()['learn_nms_time'].pop(0)
            globals()['learn_nms_time'].append(all_time)
        else:
            globals()['learn_nms_time'].append(all_time)

        globals()['learn_nms_count'] += 1
        if globals()['learn_nms_count'] % 250 == 0:
            print("--->> learn nms running average time cost: {}".format(
                float(sum(globals()['learn_nms_time'])) /
                (1000 if globals()['learn_nms_count'] > 1000 else
                 globals()['learn_nms_count'])))

        self.assign(out_data[0], req[0], nms_multi_score)
        self.assign(out_data[1], req[1], sorted_bbox)
        self.assign(out_data[2], req[2], sorted_score)
Beispiel #25
0
    def forward(self, x, gt_boxes=None):
        """
        :param x: ndarray (B,C,H,W)
        :return: 
        """
        def _split_box(x, num_outputs, axis, squeeze_axis=False):
            a = nd.split(x,
                         axis=axis,
                         num_outputs=num_outputs,
                         squeeze_axis=squeeze_axis)
            if not isinstance(a, (list, tuple)):
                return [a]
            return a

        # 首先用basenet抽取特征
        feat = self.features(x)

        # 输入RPN网络
        if autograd.is_training():
            # 训练过程
            img = nd.zeros_like(x)
            rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors = self.rpn(
                feat, img)
            # 采样输出
            rpn_box, samples, matches = self.sampler(rpn_box, rpn_score,
                                                     gt_boxes)
        else:
            # 预测过程
            # output shape (B,N,4)
            _, rpn_box = self.rpn(feat, x)
        # 对输出的Region Proposal 进行采样
        # 输出送到后面运算的RoI
        # rois shape = (B,self._num_sampler,4),

        num_roi = self._num_sample if autograd.is_training(
        ) else self._rpn_test_post_nms

        # 将rois变为2D,加上batch_index
        with autograd.pause():
            roi_batchid = nd.arange(0,
                                    self._max_batch,
                                    repeat=num_roi,
                                    ctx=rpn_box.context)

            rpn_roi = nd.concat(
                *[roi_batchid.reshape((-1, 1)),
                  rpn_box.reshape((-1, 4))],
                dim=-1)
            rpn_roi = nd.stop_gradient(rpn_roi)

        # RoI Pooling 层
        if self._roi_mode == 'pool':
            # (Batch*num_roi,channel,H,W)
            pool_feat = nd.ROIPooling(feat, rpn_roi, self._roi_size,
                                      1 / self._stride)

        elif self._roi_mode == 'align':
            pool_feat = nd.contrib.ROIAlign(feat,
                                            rpn_roi,
                                            self._roi_size,
                                            1 / self._stride,
                                            sample_ratio=2)
        else:
            raise ValueError("Invalid roi mode: {}".format(self._roi_mode))

        top_feat = self.top_features(pool_feat)
        avg_feat = self.global_avg_pool(top_feat)
        # 类别预测,回归预测
        # output shape (B*num_roi,(num_cls+1)) -> (B,N,C)
        cls_pred = self.class_predictor(avg_feat)
        # output shape (B*num_roi,(num_cls)*4) -> (B,N,C,4)
        box_pred = self.bbox_predictor(avg_feat)

        cls_pred = cls_pred.reshape(
            (self._max_batch, num_roi, self.num_class + 1))
        box_pred = box_pred.reshape(
            (self._max_batch, num_roi, self.num_class, 4))

        # 训练过程
        if autograd.is_training():

            return (cls_pred, box_pred, rpn_box, samples, matches,
                    raw_rpn_score, raw_rpn_box, anchors)
        # 预测过程
        # 还要进行的步骤,将预测的类别和预测的偏移量加到输入的RoI中
        else:
            # 直接输出所有类别的信息
            # cls_id (B,N,C) scores(B,N,C)
            cls_ids, scores = self.cls_decoder(nd.softmax(cls_pred, axis=-1))

            # 将所有的C调换到第一维
            # (B,N,C)  -----> (B,N,C,1) -------> (B,C,N,1)
            cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1))
            # (B,N,C)  -----> (B,N,C,1) -------> (B,C,N,1)
            scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1))
            # (B,N,C,4) -----> (B,C,N,4),
            box_pred = box_pred.transpose((0, 2, 1, 3))

            rpn_boxes = _split_box(rpn_box,
                                   num_outputs=self._max_batch,
                                   axis=0,
                                   squeeze_axis=False)
            cls_ids = _split_box(cls_ids,
                                 num_outputs=self._max_batch,
                                 axis=0,
                                 squeeze_axis=True)
            scores = _split_box(scores,
                                num_outputs=self._max_batch,
                                axis=0,
                                squeeze_axis=True)
            box_preds = _split_box(box_pred,
                                   num_outputs=self._max_batch,
                                   axis=0,
                                   squeeze_axis=True)

            results = []
            # 对每个batch分别进行decoder nms
            for cls_id, score, box_pred, rpn_box in zip(
                    cls_ids, scores, box_preds, rpn_boxes):
                # box_pred(C,N,4)   rpn_box(1,N,4)   box (C,N,4)
                box = self.box_decoder(box_pred, self.box_to_center(rpn_box))

                # cls_id (C,N,1) score (C,N,1) box (C,N,4)
                # result (C,N,6)
                res = nd.concat(*[cls_id, score, box], dim=-1)
                # nms操作 (C,self.nms_topk,6)
                res = nd.contrib.box_nms(res,
                                         overlap_thresh=self.nms_thresh,
                                         valid_thresh=0.0001,
                                         topk=self.nms_topk,
                                         coord_start=2,
                                         score_index=1,
                                         id_index=0,
                                         force_suppress=True)

                res = res.reshape((-3, 0))
                results.append(res)

            results = nd.stack(*results, axis=0)
            ids = nd.slice_axis(results, axis=-1, begin=0, end=1)
            scores = nd.slice_axis(results, axis=-1, begin=1, end=2)
            bboxes = nd.slice_axis(results, axis=-1, begin=2, end=6)

        # 输出为score,bbox
        return ids, scores, bboxes
Beispiel #26
0
    def hybrid_forward(self, F, inputs, outputs, initial_hidden_state,
                       batch_size_seq):
        #문장 길이 2 == END tag index
        inputs = F.cast(inputs, dtype='float32')
        in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx,
                                            F.ones_like(inputs),
                                            F.zeros_like(inputs)),
                                    axis=1)

        outputs = F.cast(outputs, dtype='float32')
        out_sent_last_idx = F.argmax(F.where(outputs == self.end_idx,
                                             F.ones_like(outputs),
                                             F.zeros_like(outputs)),
                                     axis=1)
        #encoder GRU
        embeddinged_in = F.cast(self.embedding(inputs), dtype='float32')

        next_h = initial_hidden_state
        for j in range(self.in_seq_len):
            p_outputs = F.slice_axis(embeddinged_in,
                                     axis=1,
                                     begin=j,
                                     end=j + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            enout, (next_h, ) = self.encoder(p_outputs, [
                next_h,
            ])
            if j == 0:
                enouts = enout
                next_hs = next_h
            else:
                enouts = F.concat(enouts, enout, dim=1)
                next_hs = F.concat(next_hs, next_h, dim=1)
        #masking with 0 using length
        enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden))
        enouts = F.transpose(enouts, (1, 0, 2))
        enouts = F.SequenceMask(enouts,
                                sequence_length=in_sent_last_idx + 1,
                                use_sequence_length=True)
        enouts = F.transpose(enouts, (1, 0, 2))

        next_hs = F.reshape(next_hs, (-1, self.n_hidden))
        #take가 0 dim만 지원하기 때문에..
        # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...)
        next_hs = next_hs.take(in_sent_last_idx +
                               (batch_size_seq * self.max_seq_length))
        embeddinged_out = F.cast(self.embedding(outputs), dtype='float32')

        #decoder GRU with attention
        for i in range(self.out_seq_len):
            #out_seq_len 길이만큼 GRUCell을 unroll하면서 출력값을 적재한다.
            p_outputs = F.slice_axis(embeddinged_out,
                                     axis=1,
                                     begin=i,
                                     end=i + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            # p_outputs = outputs[:,i,:]
            # 위와 같이 진행한 이유는 hybridize를 위함
            if self.attention:
                p_outputs, _ = self.apply_attention(F=F,
                                                    inputs=p_outputs,
                                                    hidden=next_hs,
                                                    encoder_outputs=enouts)
            deout, (next_hs, ) = self.decoder(p_outputs, [
                next_hs,
            ])
            if i == 0:
                deouts = deout
            else:
                deouts = F.concat(deouts, deout, dim=1)
        #2dim -> 3dim 으로 reshape
        deouts = F.reshape(deouts, (-1, self.out_seq_len, self.n_hidden))
        #0 padding
        deouts = F.transpose(deouts, (1, 0, 2))
        deouts = F.SequenceMask(deouts,
                                sequence_length=out_sent_last_idx + 1,
                                use_sequence_length=True)
        deouts = F.transpose(deouts, (1, 0, 2))

        deouts = self.batchnorm(deouts)
        deouts_fc = self.dense(deouts)
        return (deouts_fc)
Beispiel #27
0
    def calulation(self, input_str, ko_dict, en_dict, en_rev_dict, ctx):
        """
        inference 코드 
        """
        #앞뒤에 START,END 코드 추가
        input_str = [
            [
                'START',
            ] + mecab.morphs(input_str.strip()) + [
                'END',
            ],
        ]
        X = encoding_and_padding(input_str,
                                 ko_dict,
                                 max_seq=self.max_seq_length)
        #string to embed
        inputs = F.array(X, ctx=ctx)

        inputs = F.cast(inputs, dtype='float32')
        in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx,
                                            F.ones_like(inputs),
                                            F.zeros_like(inputs)),
                                    axis=1)

        #encoder GRU
        embeddinged_in = F.cast(self.embedding(inputs), dtype='float32')
        next_h = F.random.normal(0, 1, (1, self.n_hidden), ctx=ctx)
        for j in range(self.in_seq_len):
            p_outputs = F.slice_axis(embeddinged_in,
                                     axis=1,
                                     begin=j,
                                     end=j + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            enout, (next_h, ) = self.encoder(p_outputs, [
                next_h,
            ])
            if j == 0:
                enouts = enout
                next_hs = next_h
            else:
                enouts = F.concat(enouts, enout, dim=1)
                next_hs = F.concat(next_hs, next_h, dim=1)
        #masking with 0 using length
        enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden))
        enouts = F.transpose(enouts, (1, 0, 2))
        enouts = F.SequenceMask(enouts,
                                sequence_length=in_sent_last_idx + 1,
                                use_sequence_length=True)
        enouts = F.transpose(enouts, (1, 0, 2))

        next_hs = F.reshape(next_hs, (-1, self.n_hidden))
        #take가 0 dim만 지원하기 때문에..
        # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...)
        next_hs = next_hs.take(in_sent_last_idx)

        #디코더의 초기 입력값으로 넣을 'START'를 임베딩한다.
        Y_init = F.array([
            [
                en_dict['START'],
            ],
        ], ctx=ctx)
        Y_init = F.cast(self.embedding(Y_init), dtype='float32')
        deout = Y_init[:, 0, :]

        #출력 시퀀스 길이만큼 순회
        for i in range(self.out_seq_len):
            if self.attention:
                #print(deout.shape)
                deout, att_weight = self.apply_attention(
                    F=F, inputs=deout, hidden=next_hs, encoder_outputs=enouts)
                if i == 0:
                    att_weights = att_weight
                else:
                    att_weights = F.concat(att_weights, att_weight, dim=0)
            deout, (next_hs, ) = self.decoder(deout, [
                next_hs,
            ])
            #batchnorm을 적용하기 위해 차원 증가/원복
            deout = F.expand_dims(deout, axis=1)
            deout = self.batchnorm(deout)
            #reduce dim
            deout = deout[:, 0, :]
            #'START'의 다음 시퀀스 출력값도출
            deout_sm = self.dense(deout)
            #print(deout_sm.shape)
            deout = F.one_hot(F.argmax(F.softmax(deout_sm, axis=1), axis=1),
                              depth=self.vocab_size)
            #print(deout.shape)
            #decoder에 들어갈 수 있는 형태로 변환(임베딩 적용 및 차원 맞춤)
            deout = F.argmax(deout, axis=1)
            deout = F.expand_dims(deout, axis=0)
            deout = F.cast(self.embedding(deout)[:, 0, :], dtype='float32')
            gen_char = en_rev_dict[F.argmax(deout_sm,
                                            axis=1).asnumpy()[0].astype('int')]
            if gen_char == '__PAD__' or gen_char == 'END':
                break
            else:
                if i == 0:
                    ret_seq = [
                        gen_char,
                    ]
                else:
                    ret_seq += [
                        gen_char,
                    ]
        return (" ".join(ret_seq), att_weights)
Beispiel #28
0
def train(net, train_data, val_data, classes, args):
    """Training pipeline"""
    for param in net.collect_params().values():
        if param._data is not None:
            continue
        param.initialize()
    net.collect_params().reset_ctx(ctx)
    trainer = gluon.Trainer(
        net.collect_params(), 'sgd',
        {'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.momentum})

    # lr decay policy
    lr_decay = float(args.lr_decay)
    lr_steps = sorted([float(ls) for ls in args.lr_decay_epoch.split(',') if ls.strip()])

    cls_loss = gluon.loss.SoftmaxCrossEntropyLoss()
    box_loss = gluon.loss.HuberLoss()
    acc_metric = Accuracy(axis=-1, ignore_labels=[-1])
    ce_metric = mx.metric.Loss('CrossEntropy')
    smoothl1_metric = mx.metric.Loss('SmoothL1')

    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = args.save_prefix + '_train.log'
    log_dir = os.path.dirname(log_file_path)
    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir)
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)
    logger.info(args)
    logger.info('Start training from [Epoch %d]' % args.start_epoch)
    best_map = [0]
    for epoch in range(args.start_epoch, args.epochs):
        while lr_steps and epoch >= lr_steps[0]:
            new_lr = trainer.learning_rate * lr_decay
            lr_steps.pop(0)
            trainer.set_learning_rate(new_lr)
            logger.info("[Epoch {}] Set learning rate to {}".format(epoch, new_lr))
        acc_metric.reset()
        ce_metric.reset()
        smoothl1_metric.reset()
        tic = time.time()
        btic = time.time()
        net.hybridize()
        for i, batch in enumerate(train_data):
            batch_size = batch[0].shape[0]
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
            outputs = []
            labels = []
            losses1 = []
            losses2 = []
            losses3 = []  # temporary cls loss holder
            losses4 = []  # temporary box loss holder
            Ls = []
            num_positive = []
            with autograd.record():
                for x, y in zip(data, label):
                    cls_preds, box_preds, anchors = net(x)
                    with autograd.pause():
                        # we generate training targets here in autograd.pause scope
                        # because we don't need to bp to labels. This can reduce the
                        # overhead of auto differentiation.
                        gt_boxes = nd.slice_axis(y, axis=-1, begin=0, end=4)
                        gt_ids = nd.slice_axis(y, axis=-1, begin=4, end=5)
                        cls_targets, box_targets, box_masks = net.target_generator(
                            anchors, cls_preds, gt_boxes, gt_ids)
                        # save how many positive samples are used, it will be used to
                        # normalize the loss
                        num_positive.append(nd.sum(cls_targets > 0).asscalar())

                    # cls loss, multi class cross entropy loss, we mask out ignored
                    # labels here by broadcast_mul the positive labels
                    l1 = cls_loss(cls_preds, cls_targets, (cls_targets >= 0).expand_dims(axis=-1))
                    losses3.append(l1 * cls_targets.size / cls_targets.shape[0])
                    # box loss, it's a huber loss(or namely smoothl1 loss in paper)
                    l2 = box_loss(box_preds * box_masks, box_targets)
                    losses4.append(l2 * box_targets.size / box_targets.shape[0])
                    # some records for metrics
                    outputs.append(cls_preds)
                    labels.append(cls_targets)
                # n_pos is the overall positive samples in the entire batch
                n_pos = max(1, sum(num_positive))
                for l3, l4 in zip(losses3, losses4):
                    # normalize the losses by n_pos
                    L = l3 / n_pos + l4 / n_pos
                    Ls.append(L)
                    # losses1 and losses2 are used for loss metrics
                    losses1.append(l3 / n_pos * batch_size)  # rescale for batch
                    losses2.append(l4 / n_pos * batch_size)  # rescale for batch
                autograd.backward(Ls)
            # since we have already normalized the loss, we don't want to normalize
            # by batch-size anymore
            trainer.step(1)
            ce_metric.update(0, losses1)
            smoothl1_metric.update(0, losses2)
            acc_metric.update(labels, outputs)
            if args.log_interval and not (i + 1) % args.log_interval:
                name1, loss1 = ce_metric.get()
                name2, loss2 = smoothl1_metric.get()
                name3, loss3 = acc_metric.get()
                logger.info('[Epoch %d][Batch %d], Speed: %f samples/sec, %s=%f, %s=%f, %s=%f'%(
                    epoch, i, batch_size/(time.time()-btic), name1, loss1, name2, loss2, name3, loss3))
            btic = time.time()

        name1, loss1 = ce_metric.get()
        name2, loss2 = smoothl1_metric.get()
        name3, loss3 = acc_metric.get()
        logger.info('[Epoch %d] Training cost: %f, %s=%f, %s=%f, %s=%f'%(
            epoch, (time.time()-tic), name1, loss1, name2, loss2, name3, loss3))
        map_name, mean_ap = validate(net, val_data, ctx, classes)
        val_msg = '\n'.join(['%s=%f'%(k, v) for k, v in zip(map_name, mean_ap)])
        logger.info('[Epoch %d] Validation: \n%s'%(epoch, val_msg))
        save_params(net, best_map, mean_ap[-1], epoch, args.save_interval, args.save_prefix)
Beispiel #29
0
    return xywh_pred


NUM_EPOCHS = 40

for epoch in range(NUM_EPOCHS):
    train_iter.reset()
    for i, batch in enumerate(train_iter):
        x = batch.data[0].as_in_context(ctx)
        y = batch.label[0].as_in_context(ctx)
        with autograd.record():
            feature = net(x)
            #with autograd.pause():
            xywh_pred = feature_forward(feature)

            box_lb = nd.slice_axis(y, begin=1, end=5, axis=-1)
            xywh_lb = transform_center(box_lb)
            loss = l1_loss(xywh_pred, xywh_lb)

        loss.backward()
        trainer.step(BATCH_SIZE)

        if i % 50 == 0:
            try:
                #pdb.set_trace()
                print(xywh_pred.asnumpy()[0], xywh_lb.asnumpy()[0])
                loss_record = nd.mean(loss).asscalar()
                print('Epoch: {0}, iter: {1}, loss: {2}'.format(
                    epoch, i, loss_record))
            except:
                #pdb.set_trace()