Esempio n. 1
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    ref_scores_all = []
    ref_pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            concat_rois = output['concat_rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        rois, ref_rois = np.split(concat_rois, 2)
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        ref_scores = output['cls_prob_reshape_output'].asnumpy()[1]
        ref_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[1]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        pred_boxes /= scale

        ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas)
        ref_pred_boxes = clip_boxes(ref_pred_boxes, im_shape[-2:])
        ref_pred_boxes /= scale

        pred_boxes_all.append(pred_boxes)
        scores_all.append(scores)
        ref_pred_boxes_all.append(ref_pred_boxes)
        ref_scores_all.append(ref_scores)
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 2
0
def check_movements(ims, bef_ims, aft_ims, processed_roidb, delta_bef_roi,
                    delta_aft_roi):
    save_name = '/home/wangshiyao/Documents/testdata/' + processed_roidb[0][
        'image'].split('/')[-1]
    print 'saving images to ' + save_name
    boxes = processed_roidb[0]['boxes']
    ims.squeeze().transpose(1, 2, 0).astype(np.int8)
    bef_ims.squeeze().transpose(1, 2, 0).astype(np.int8)
    aft_ims.squeeze().transpose(1, 2, 0).astype(np.int8)
    delta_bef_roi = np.array(delta_bef_roi).transpose(1, 0, 2)
    delta_aft_roi = np.array(delta_aft_roi).transpose(1, 0, 2)
    for i in range(boxes.shape[0]):
        cv2.rectangle(ims, (int(boxes[i][0]), int(boxes[i][1])),
                      (int(boxes[i][2]), int(boxes[i][3])), (55, 255, 155), 5)
        bef_box = bbox_pred(boxes[i].reshape(1, -1), delta_bef_roi[i])
        cv2.rectangle(bef_ims, (int(bef_box[0][0]), int(bef_box[0][1])),
                      (int(bef_box[0][2]), int(bef_box[0][3])), (55, 255, 155),
                      5)
        aft_box = bbox_pred(boxes[i].reshape(1, -1), delta_aft_roi[i])
        cv2.rectangle(aft_ims, (int(aft_box[0][0]), int(aft_box[0][1])),
                      (int(aft_box[0][2]), int(aft_box[0][3])), (55, 255, 155),
                      5)

    imageio.imsave(save_name, ims)
    imageio.imsave(save_name.split('.')[-2] + '_bef' + '.JPEG', bef_ims)
    imageio.imsave(save_name.split('.')[-2] + '_aft' + '.JPEG', aft_ims)
Esempio n. 3
0
    def detect(self, batch, scales):
        data = dict(zip(self.data_names, batch.data))
        outputs = self.forward(batch)
        scores, preds = [], []
        im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2)
        im_ids = np.array([], dtype=int)

        for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)):
            gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy()
            # Reshape crois
            nper_gpu = gpu_rois.shape[0] / self.batch_size
            gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy()
            gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy()
            im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int)))
            for idx in range(self.batch_size):
                cids = np.where(gpu_rois[:, 0] == idx)[0]
                assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!'
                crois = gpu_rois[cids, 1:]
                cscores = gpu_scores[idx]
                cdeltas = gpu_deltas[idx]

                # Apply deltas and clip predictions
                cboxes = bbox_pred(crois, cdeltas)
                cboxes = clip_boxes(cboxes, gpu_shapes[idx])

                # Re-scale boxes
                cboxes = cboxes / gpu_scales[idx]

                # Store predictions
                scores.append(cscores)
                preds.append(cboxes)
        return scores, preds, data, im_ids
Esempio n. 4
0
    def forward(self, is_train, req, in_data, out_data, aux):
        '''
        :param is_train:
        :param req:
        :param in_data: in_data[0] rois: (128, 5) First col are all 0's. True coordinate.
                        in_data[1] bbox_deltas: (128, 8)
                        in_data[2] im_info: im.shape = (im_info[0], im_info[1])
        :param out_data:
        :param aux:
        :return:
        '''
        rois = in_data[0].asnumpy()[:, 1:]  # (128, 4) Move 0's in first col.
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(rois, bbox_deltas)
        # 2. clip predicted boxes to image
        proposals = clip_boxes(
            proposals, im_info[:2]
        )  # (128, 8) First 4 cols: background, last 4 cols: object
        proposals = proposals[:, 4:]  # (128, 4)
        zeros = np.zeros((proposals.shape[0], 1), dtype=proposals.dtype)
        proposals = np.hstack((zeros, proposals))
        self.assign(out_data[0], req[0], proposals)

        if DEBUG:
            print proposals
Esempio n. 5
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 6
0
    def forward(self, is_train, req, in_data, out_data, aux):

        rois = in_data[0].asnumpy()[:, 1:]
        cls_prob = in_data[1].asnumpy()
        assert self._cfg.CLASS_AGNOSTIC, 'Currently only support class agnostic'
        if self._cfg.CLASS_AGNOSTIC:
            bbox_deltas = in_data[2].asnumpy()[:, 4:8]
        else:
            fg_cls_prob = cls_prob[:, 1:]
            fg_cls_idx = np.argmax(fg_cls_prob, axis=1).astype(np.int)
            batch_idx_array = np.arange(fg_cls_idx.shape[0], dtype=np.int)
            # bbox_deltas = in_data[2].asnumpy()[batch_idx_array, fg_cls_idx * 4: (fg_cls_idx + 1) * 4]
            in_data2 = in_data[2].asnumpy()
            bbox_deltas = np.hstack(
                (in_data2[batch_idx_array, fg_cls_idx * 4].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 1].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 2].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 3].reshape(-1, 1)))
        im_info = in_data[3].asnumpy()[0, :]

        # post processing
        # if self._is_train:
        #     if self._cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
        #         bbox_deltas = bbox_deltas * np.array(self._cfg.TRAIN.BBOX_STDS) + np.array(self._cfg.TRAIN.BBOX_MEANS)

        proposals = bbox_pred(rois, bbox_deltas)
        proposals = clip_boxes(proposals, im_info[:2])

        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))

        self.assign(out_data[0], req[0], blob)
Esempio n. 7
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN or cfg.network.ROIDispatch:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        if cfg.TEST.LEARN_NMS:
            pred_boxes = output['learn_nms_sorted_bbox'].asnumpy()
            # raw_scores = output['sorted_score_output'].asnumpy()
            scores = output['nms_final_score_output'].asnumpy()
        else:
            scores = output['cls_prob_reshape_output'].asnumpy()[0]
            bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

            # post processing
            pred_boxes = bbox_pred(rois, bbox_deltas)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 8
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)

    if output_all[0].has_key('feat_conv_3x3_relu_output'):
        feat = output_all[0]['feat_conv_3x3_relu_output']
    else:
        feat = None
    return scores_all, pred_boxes_all, data_dict_all, feat
Esempio n. 9
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)
    print('output length: {}'.format(len(output_all)))
    print('data batch length: {}'.format(len(data_batch.data)))
    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 10
0
    def detect(self, batch, scales):
        data = dict(zip(self.data_names, batch.data))
        outputs = self.forward(batch)
        scores, preds = [], []
        im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2)
        im_ids = np.array([], dtype=int)

        for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)):
            gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy()
            # Reshape crois
            nper_gpu = gpu_rois.shape[0] / self.batch_size
            gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy()
            gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy()
            im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int)))
            for idx in range(self.batch_size):
                cids = np.where(gpu_rois[:, 0] == idx)[0]
                assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!'
                crois = gpu_rois[cids, 1:]
                cscores = gpu_scores[idx]
                cdeltas = gpu_deltas[idx]

                # Apply deltas and clip predictions
                cboxes = bbox_pred(crois, cdeltas)
                cboxes = clip_boxes(cboxes, gpu_shapes[idx])

                # Re-scale boxes
                cboxes = cboxes / gpu_scales[idx]

                # Store predictions
                scores.append(cscores)
                preds.append(cboxes)
        return scores, preds, data, im_ids
Esempio n. 11
0
def im_batch_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        im_infos = data_dict['im_info'].asnumpy()
        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        rois = output['rois_output'].asnumpy()
        for im_idx in xrange(im_infos.shape[0]):
            bb_idxs = np.where(rois[:, 0] == im_idx)[0]
            im_shape = im_infos[im_idx, :2].astype(np.int)

            # post processing
            pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :])
            pred_boxes = clip_boxes(pred_boxes, im_shape)

            # we used scaled image & roi to train, so it is necessary to transform them back
            pred_boxes = pred_boxes / scale[im_idx]

            scores_all.append(scores[bb_idxs, :])
            pred_boxes_all.append(pred_boxes)

    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 12
0
def im_batch_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data))]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        im_infos = data_dict['im_info'].asnumpy()
        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        rois = output['rois_output'].asnumpy()
        for im_idx in xrange(im_infos.shape[0]):
            bb_idxs = np.where(rois[:,0] == im_idx)[0]
            im_shape = im_infos[im_idx, :2].astype(np.int)

            # post processing
            pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :])
            pred_boxes = clip_boxes(pred_boxes, im_shape)

            # we used scaled image & roi to train, so it is necessary to transform them back
            pred_boxes = pred_boxes / scale[im_idx]

            scores_all.append(scores[bb_idxs, :])
            pred_boxes_all.append(pred_boxes)

    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 13
0
def det(mod, fn):
    
    raw_img = cv2.imread(fn)

    if raw_img.shape[0] < raw_img.shape[1]:
        raw_img = cv2.copyMakeBorder(raw_img,0
        ,raw_img.shape[1]-raw_img.shape[0], 0, 0, cv2.BORDER_CONSTANT)

    im_shape = [IMG_H,IMG_W] # reverse order
    img = cv2.resize(raw_img, (IMG_H,IMG_W))
    raw_h = img.shape[0]
    raw_w = img.shape[1]

    im_tensor = image.transform(img, [124,117,104], 0.0167)

    im_info = np.array([[  IMG_H,   IMG_W,   4.18300658e-01]])

    batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)])

    start = time.time()
    mod.forward(batch)

    output_names = mod.output_names
    output_tensor = mod.get_outputs()
    mod.get_outputs()[0].wait_to_read()
    print ("time", time.time()-start, "secs.")

    output = dict(zip(output_names ,output_tensor))

    rois = output['rois_output'].asnumpy()[:, 1:]
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    num_classes = 2

    all_cls_dets = [[] for _ in range(num_classes)]

    for j in range(1, num_classes):
        indexes = np.where(scores[:, j] > 0.1)[0]
        cls_scores = scores[indexes, j, np.newaxis]
        cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4]
        cls_dets = np.hstack((cls_boxes, cls_scores)).copy()
        all_cls_dets[j] = cls_dets

    for idx_class in range(1, num_classes):
        nms = py_nms_wrapper(0.3)
        keep = nms(all_cls_dets[idx_class])
        all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :]

    for i in range(all_cls_dets[1].shape[0]):
        cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1]))
        ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),1)

    
    cv2.imshow("w", img)
    cv2.waitKey()
Esempio n. 14
0
def double_im_detect(predictor, data_batch, data_names, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    ref_scores_all = []
    ref_pred_boxes_all = []
    for output, data_dict in zip(output_all, data_dict_all):

        scale = data_dict['im_info'][0, 2]

        rois = output['rois_output'].asnumpy()[:, 1:]
        ref_rois = output['ref_rois_output'].asnumpy()[:, 1:]
        im_shape = data_dict['data'].shape
        ref_im_shape = data_dict['ref_data']
        non_ref_dim = rois.shape[0]

        # save output
        scores = output['cls_prob_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_output'].asnumpy()[0]
        ref_scores = output['cls_prob_output'].asnumpy()[1]
        ref_bbox_deltas = output['bbox_pred_output'].asnumpy()[1]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        ref_pred_boxes = clip_boxes(ref_pred_boxes, ref_im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale
        ref_pred_boxes = ref_pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)

        ref_scores_all.append(ref_scores)
        ref_pred_boxes_all.append(ref_pred_boxes)

    return scores_all, pred_boxes_all, ref_scores_all, ref_pred_boxes_all, data_dict_all
Esempio n. 15
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        tmp_w = rois[:, 2] - rois[:, 0] + 1
        tmp_h = rois[:, 3] - rois[:, 1] + 1
        tmp_area = np.sqrt(tmp_w * tmp_h)

        range1 = np.where(tmp_area <= 90)[0]
        range2 = np.where((tmp_area >= 30) & (tmp_area <= 160))[0]
        range3 = np.where(tmp_area >= 90)[0]
        range2_add = range2 + rois.shape[0]
        range3_add = range3 + rois.shape[0] * 2

        keep = np.hstack((range1, range2, range3))
        keep_add = np.hstack((range1, range2_add, range3_add))
        rois = rois[keep, :]
        scores = scores[keep_add, :]
        bbox_deltas = bbox_deltas[keep_add, :]

        #tot = rois.shape[0]

        #idxs1 = np.zeros((tot, 1), dtype=np.int32)
        #idxs2 = np.zeros((tot, 1), dtype=np.int32)
        #idxs3 = np.zeros((tot, 1), dtype=np.int32)
        #idxs1[range1, :] = 1
        #idxs2[range2, :] = 1
        #idxs3[range3, :] = 1

        #scores = (scores[:tot, :] * idxs1 + scores[tot:tot*2, :] * idxs2 + scores[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3)
        #bbox_deltas = (bbox_deltas[:tot, :] * idxs1 + bbox_deltas[tot:tot*2, :] * idxs2 + bbox_deltas[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3)

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 16
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)
    data_dict_all = [dict(zip(data_names, data_batch.data))]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all,scales):
        rois = output['rois_output'].asnumpy()[:,1:]

        im_shape = data_dict['data'].shape
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS),cfg.dataset.NUM_CLASSES)
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] *stds
        pred_boxes = bbox_pred(rois,bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
        if DEBUG:
          print("im shape: ",im_shape)
          print(pred_boxes.shape)
          print(scores.shape)
          max_scores = scores.argmax(axis = 1)
          max_scores_val = scores[np.arange(pred_boxes.shape[0]),max_scores]
          keep = np.where(max_scores>0)[0]
          max_scores = max_scores[keep]
          print(pred_boxes)
          bboxes = pred_boxes.copy()[keep]*scale
          max_scores_val = max_scores_val[keep]
          img = data_dict['data'].asnumpy().transpose((0,2,3,1))[0]
          img = (img * np.array([[[0.229, 0.224, 0.225]]]) +np.array([[[0.485, 0.456, 0.406]]])) * 255
          img = np.clip(img,0,255)
          img = img.astype(np.uint8)
          print(type(img))
          image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
          print(img.shape)
          print(max_scores_val)
          maxid = max_scores_val.argsort()[-30:]
          for i, boxxes in enumerate(bboxes):
            if not i in maxid: continue
            #print("ith box:")
            #print(boxxes)
            #print(max_scores[i])
            box = boxxes[max_scores[i]*4:(max_scores[i]+1)*4]
            box = box.astype(np.int64)
            print(box)
            cv2.rectangle(image,tuple(box[:2]),tuple(box[2:]),(255,0,0),1)
            cv2.putText(image,names[max_scores[i]]+" "+str(max_scores_val[i]),tuple(box[:2]),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),1)
          cv2.imwrite("./det_images/det_img_{:3f}.png".format(np.random.randn()),image)
          #pdb.set_trace()
          
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 17
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    pred_kps_all = []
    for output, data_dict in zip(output_all, data_dict_all):
        if cfg.TEST.HAS_RPN:
            batch_rois = output['rois_output'].asnumpy()
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        batch_scores = output['cls_prob_reshape_output'].asnumpy()
        batch_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()
        batch_im_info = data_dict['im_info'].asnumpy()

        for i in range(cfg.TEST.BATCH_IMAGES):
            scale = batch_im_info[i, 2]
            if scale < 1e-6:
                break
            indices = np.where(batch_rois[:, 0] == i)[0]
            rois = batch_rois[indices, 1:]
            scores = batch_scores[i]
            bbox_deltas = batch_bbox_deltas[i]

            # post processing
            pred_boxes = bbox_pred(rois, bbox_deltas)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

            # we used scaled image & roi to train, so it is necessary to transform them back
            pred_boxes = pred_boxes / scale

            scores_all.append(scores)
            pred_boxes_all.append(pred_boxes)

        if cfg.network.PREDICT_KEYPOINTS:
            assert cfg.TEST.BATCH_IMAGES == 1, "only support batch_size=1"
            kps_deltas = output['kps_pos_pred_reshape_output'].asnumpy(
            )  # [N, 2*K, G, G]
            kps_probs = output['kps_prob_output'].asnumpy()  # [N*K, G*G]
            pred_kps = predict_keypoints(rois,
                                         kps_probs,
                                         kps_deltas,
                                         scale=scale)
            pred_kps_all.append(pred_kps)

    if cfg.network.PREDICT_KEYPOINTS:
        return scores_all, pred_boxes_all, pred_kps_all, data_dict_all
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 18
0
def im_detect(predictor,
              data_batch,
              data_names,
              scales,
              cfg,
              aggr_feats=False):
    output_all = predictor.predict(data_batch)
    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    aggr_feats_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if 'blockgrad0_output' in output:
            for i, key in enumerate([
                    '_', 'rois_output', 'cls_prob_reshape_output',
                    'bbox_pred_reshape_output',
                    '_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL * 2 -
                                            1)
            ]):
                output[key] = output['blockgrad{}_output'.format(i)]
        if aggr_feats:
            aggr_feats_all.append(
                output['_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL *
                                               2 - 1)])
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    if aggr_feats:
        return zip(scores_all, pred_boxes_all, data_dict_all), aggr_feats_all
    return zip(scores_all, pred_boxes_all, data_dict_all)
Esempio n. 19
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)
    data_dict_all = [dict(data_names, data_batch.data)]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all,scales):
        rois = output['rois_output'].asnumpy()[:,1:]

        im_shape = data_dict['data'].im_shape
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        pred_boxes = bbox_pred(rois,bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        pred_boxes = pred_boxes / scale
        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 20
0
def im_detect_feats_stats(predictor,
                          data_batch,
                          data_names,
                          scales,
                          cfg,
                          stat_type,
                          scores_field='cls_prob_reshape'):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    rois_feats_all = []
    stats_all = []

    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output[scores_field + '_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        rois_feats = output['psp_final_embed_output'].asnumpy(
        )  # shape: [#rois, Embed_dim]
        rois_feats_all.append(rois_feats)

        if stat_type == 'ratio_val':
            stats = output['fc_val_reg_2_output'].asnumpy()
            stats_all.append(stats)
        if stat_type == 'feat_pred':
            stats = output['fc_score_hist_3_output'].asnumpy()
            stats_all.append(stats)

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all, rois_feats_all, stats_all
Esempio n. 21
0
    def detect(self, batch, scales):
        data = dict(zip(self.data_names, batch.data))
        outputs = self.forward(batch)
        scores, preds, maps = [], [], []
        im_ids = np.array([], dtype=int)
        chip_ids = np.array([], dtype=int)
        has_focus_maps = True if self.rcnn_output_names[
            'scale_map'] in outputs[0] else False
        for i, (gpu_out, gpu_scales) in enumerate(zip(outputs, scales)):
            gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy()
            # Reshape crois
            nper_gpu = gpu_rois.shape[0] / self.batch_size
            if has_focus_maps:
                scale_prob = gpu_out[
                    self.rcnn_output_names['scale_map']].asnumpy()
            gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy()
            gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy()
            gpu_infos = gpu_out[self.rcnn_output_names['im_info']].asnumpy()
            gpu_shapes = gpu_infos[:, :2]
            im_ids = np.hstack((im_ids, gpu_out[
                self.rcnn_output_names['im_ids']].asnumpy().astype(int)))
            chip_ids = np.hstack((chip_ids, gpu_out[
                self.rcnn_output_names['chip_ids']].asnumpy().astype(int)))
            for idx in range(self.batch_size):
                cids = np.where(gpu_rois[:, 0] == idx)[0]
                assert len(
                    cids
                ) == nper_gpu, 'The number of rois per GPU should be fixed!'
                crois = gpu_rois[cids, 1:]
                cscores = gpu_scores[idx]
                cdeltas = gpu_deltas[idx]

                # Apply deltas and clip predictions
                cboxes = bbox_pred(crois, cdeltas)
                cboxes = clip_boxes(cboxes, gpu_shapes[idx])

                # Re-scale boxes
                cboxes = cboxes / gpu_scales[idx]

                # Store predictions
                scores.append(cscores)
                preds.append(cboxes)
                if has_focus_maps:
                    maps.append(scale_prob[idx])

        return scores, preds, data, im_ids, maps, chip_ids
Esempio n. 22
0
    def forward(self, is_train, req, in_data, out_data, aux):

        bottom_rois = in_data[0].asnumpy()
        bbox_delta = in_data[1].asnumpy()
        cls_prob = in_data[2].asnumpy()
        im_info = in_data[3].asnumpy()

        num_rois = bottom_rois.shape[0]
        # 1. judge if bbox class-agnostic
        # 2. if not, calculate bbox_class_idx
        if self._bbox_class_agnostic:
            bbox_class_idx = np.ones((num_rois))  # (num_rois, 1) zeros
        else:
            bbox_class_idx = np.argmax(cls_prob[:, 1:], axis=1) + 1
        bbox_class_idx = bbox_class_idx[:, np.newaxis] * 4
        bbox_class_idx = np.hstack((bbox_class_idx, bbox_class_idx + 1,
                                    bbox_class_idx + 2, bbox_class_idx + 3))

        # 3. get bbox_pred given bbox_class_idx
        rows = np.arange(num_rois, dtype=np.intp)
        bbox_delta = bbox_delta[rows[:, np.newaxis],
                                bbox_class_idx.astype(np.intp)]

        # 4. calculate bbox_delta by bbox_pred[i] * std[i] + mean[i]
        means = np.array(self._bbox_means)
        stds = np.array(self._bbox_stds)
        vx = bbox_delta[:, 0] * stds[0] + means[0]
        vy = bbox_delta[:, 1] * stds[1] + means[1]
        vw = bbox_delta[:, 2] * stds[2] + means[2]
        vh = bbox_delta[:, 3] * stds[3] + means[3]
        bbox_delta = np.hstack((vx[:, np.newaxis], vy[:, np.newaxis],
                                vw[:, np.newaxis], vh[:, np.newaxis]))

        # 6. calculate top_rois by bbox_pred
        proposal = bbox_pred(bottom_rois[:, 1:], bbox_delta)

        # 7. clip boxes
        if self._b_clip_boxes:
            proposal = clip_boxes(proposal, im_info[0, :2])

        output = bottom_rois
        output[:, 1:] = proposal

        for ind, val in enumerate([output]):
            self.assign(out_data[ind], req[ind], val)
Esempio n. 23
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(list(zip(data_names, idata))) for idata in data_batch.data
    ]
    scores_all = []
    pred_boxes_all = []
    roi_score_all = []
    rois_all = []
    roi_feat_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            assert np.all(output['rois_output'].asnumpy()[:, 0] == 0.)
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        roi_score = output['rois_score'].asnumpy()
        indice = roi_score.flatten().argsort()[::-1]
        roi_score = roi_score[indice]
        roi_feat = output['roi_feat_output_output'].asnumpy()[indice]
        rois = rois[indice]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale
        rois = rois / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
        roi_score_all.append(roi_score)
        rois_all.append(rois)
        roi_feat_all.append(roi_feat)
    return scores_all, pred_boxes_all, roi_score_all, rois_all, roi_feat_all, data_dict_all
Esempio n. 24
0
    def forward(self, is_train, req, in_data, out_data, aux):

        rois = in_data[0].asnumpy()[:, 1:]
        bbox_deltas = in_data[1].asnumpy()[:, 4:8]
        im_info = in_data[2].asnumpy()[0, :]
        cls_prob = in_data[3].asnumpy()[:, 1:]  # ignore bg

        num_keep_index = int(rois.shape[0] * self._top)
        # sort scores
        max_scores = np.amax(cls_prob, axis=1)
        # keep top scores
        keep_index = np.argsort(-max_scores)[:num_keep_index]

        proposals = bbox_pred(rois, bbox_deltas)
        proposals = clip_boxes(proposals, im_info[:2])

        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        self.assign(out_data[0], req[0], blob[keep_index, :])
        self.assign(out_data[1], req[1], keep_index)
Esempio n. 25
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        if cfg.DCR.top == 1:
            dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0]
            scores = scores * dcr_scores
        elif cfg.DCR.top > 0:
            dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0]
            keep_index = output['keep_index_reshape_output'].asnumpy().astype(
                np.int)[0]
            dcr_final_scores = np.ones_like(scores)
            dcr_final_scores[keep_index, :] = dcr_scores
            scores = scores * dcr_final_scores
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 26
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))

        # 1. Generate proposals from bbox_deltas and shifted anchors
        # use real image size instead of padded feature map sizes
        height, width = int(im_info[0] / self._feat_stride), int(
            im_info[1] / self._feat_stride)

        if DEBUG:
            print('score map size: {}'.format(scores.shape))
            print("resudial: {}".format(
                (scores.shape[2] - height, scores.shape[3] - width)))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Esempio n. 27
0
    def forward(self, is_train, req, in_data, out_data, aux):
        before_pyramid_proposal = datetime.now()
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        LAYER_NUM = len(in_data) / 2
        LAYER_NUM = 11
        if LAYER_NUM == 7:
            cls_prob_dict = {
                'stride64': in_data[6],
                'stride32': in_data[5],
                'stride16': in_data[4],
                'stride8': in_data[3],
                'stride4': in_data[2],
                'stride2': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[13],
                'stride32': in_data[12],
                'stride16': in_data[11],
                'stride8': in_data[10],
                'stride4': in_data[9],
                'stride2': in_data[8],
                'stride1': in_data[7],
            }

        elif LAYER_NUM == 6:
            cls_prob_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride16': in_data[3],
                'stride8': in_data[2],
                'stride4': in_data[1],
                'stride2': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[11],
                'stride32': in_data[10],
                'stride16': in_data[9],
                'stride8': in_data[8],
                'stride4': in_data[7],
                'stride2': in_data[6],
            }

        elif LAYER_NUM == 5:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
                'stride16': in_data[2],
                'stride8': in_data[1],
                'stride4': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
                'stride16': in_data[7],
                'stride8': in_data[6],
                'stride4': in_data[5],
            }
        elif LAYER_NUM == 2:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
            }
        elif LAYER_NUM == 11:
            cls_prob_dict = {
                'stride64': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[1],
            }
        elif LAYER_NUM == 1:
            cls_prob_dict = {
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride1': in_data[1],
            }
        elif LAYER_NUM == 3:
            cls_prob_dict = {
                'stride64': in_data[2],
                'stride32': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride1': in_data[3],
            }
        '''
        cls_prob_dict = {
            'stride8': in_data[3],
            'stride4': in_data[2],
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride8': in_data[7],
            'stride4': in_data[6],
            'stride2': in_data[5],
            'stride1': in_data[4],
        }
        '''
        '''
        cls_prob_dict = {
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride2': in_data[3],
            'stride1': in_data[2],
        }        
        '''
        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []

        channel_list = []

        before_feat = datetime.now()

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape)
            scores = cls_prob_dict['stride' +
                                   str(s)].asnumpy()[:,
                                                     self._num_anchors:, :, :]

            if DEBUG:
                scores1 = cls_prob_dict['stride' + str(s)].asnumpy()
                print "scores.shape:" + str(scores.shape)
                print "scores1.shape:" + str(scores1.shape)

            #print "scores.shape:"+str(scores.shape)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            #print "bbox_deltas.shape:"+str(bbox_deltas.shape)
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts

            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            before_enume = datetime.now()
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            after_enume = datetime.now()
            #print "enume time:"+str((after_enume-before_enume).seconds)
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order

            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))

            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
            if DEBUG:
                print "scores[:100]:" + str(scores[:50])
            channels = np.ones((scores.shape)) * stride

            # Convert anchors into proposals via bbox transformations
            before_pred = datetime.now()
            proposals = bbox_pred(anchors, bbox_deltas)
            after_pred = datetime.now()
            #print "pred_time:"
            #print (after_pred-before_pred).seconds
            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])
            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            if DEBUG:
                print str(min_size)
                print str(im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            if DEBUG:
                print "proposals3:" + str(proposals[0:10])
            scores = scores[keep]

            channels = channels[keep]

            proposal_list.append(proposals)
            score_list.append(scores)
            channel_list.append(channels)
        after_feat = datetime.now()
        #print "feat time:"
        #print (after_feat-before_feat).seconds

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)
        channels = np.vstack(channel_list)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        before_sort = datetime.now()
        order = scores.ravel().argsort()[::-1]
        after_sort = datetime.now()
        #print "sort time:"
        #print (after_sort-before_sort).seconds
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        channels = channels[order]
        if DEBUG:
            print '-------1-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)

        keep = nms(det)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        channels = channels[keep]
        if DEBUG:
            print '-------2-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
            print "scores:" + str(scores[0:20])
        f_chan = open('channels.txt', 'w')
        for ii in range(channels.shape[0]):
            f_chan.write(str(channels[ii][0]) + ' ')
        f_chan.close()

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        #print "out_data[0].shape"+str(out_data[0].shape)
        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
        after_pyramid_proposal = datetime.now()
Esempio n. 28
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)
    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            concat_rois = output['concat_rois_output'].asnumpy()[:, 1:]
            rois, ref_rois = np.split(concat_rois, 2)
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        if cfg.TEST.LEARN_NMS:
            concat_pred_boxes = output['concat_sorted_bbox_output'].asnumpy()
            # raw_scores = output['sorted_score_output'].asnumpy()
            concat_nms_scores = output['nms_final_score_output'].asnumpy()

            concat_pre_nms_scores = output['pre_nms_score_output'].asnumpy()
            # we used scaled image & roi to train, so it is necessary to transform them back
            concat_pred_boxes = concat_pred_boxes / scale

            concat_multi_scores = np.dstack(
                (concat_nms_scores, concat_pre_nms_scores))

            # concat_nms_scores /= concat_pre_nms_scores
            pred_boxes, ref_pred_boxes = np.split(concat_pred_boxes, 2)
            scores, ref_scores = np.split(concat_multi_scores, 2)

            pred_boxes_all.append(pred_boxes)
            ref_pred_boxes_all.append(ref_pred_boxes)
            scores_all.append(scores)
            ref_scores_all.append(ref_scores)

            nms_multi_target = output['custom0_nms_multi_target'].asnumpy()
            target, ref_target = np.split(nms_multi_target, 2)
            concat_target_boxes = concat_pred_boxes[np.where(nms_multi_target)
                                                    [:2]]
            concat_target_scores = concat_nms_scores[np.where(nms_multi_target)
                                                     [:2]]
            concat_pre_target_scores = concat_pre_nms_scores[np.where(
                nms_multi_target)[:2]]

            # concat_target_boxes = concat_target_boxes / scale

            # construct gt style nms_multi_target, 0:30 classes
            concat_target_boxes = np.hstack(
                (concat_target_boxes,
                 np.where(nms_multi_target)[1][:, np.newaxis]))
            concat_target_boxes = np.hstack(
                (concat_target_boxes, concat_target_scores[:, np.newaxis]))
            concat_target_boxes = np.hstack(
                (concat_target_boxes, concat_pre_target_scores[:, np.newaxis]))

            target_boxes, ref_target_boxes = np.split(concat_target_boxes, 2)

            data_dict['nms_multi_target'] = target_boxes
            data_dict['ref_nms_multi_target'] = ref_target_boxes

        else:
            # save output
            scores = output['cls_prob_reshape_output'].asnumpy()[0]
            bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
            # post processing
            pred_boxes = bbox_pred(rois, bbox_deltas)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

            # we used scaled image & roi to train, so it is necessary to transform them back
            pred_boxes = pred_boxes / scale

            scores_all.append(scores)
            pred_boxes_all.append(pred_boxes)

    return scores_all, pred_boxes_all, data_dict_all
Esempio n. 29
0
    def forward(self, is_train, req, in_data, out_data, aux):

        cls_pro = in_data[4]

        bbox_pred_dict = {
            'stride128': in_data[3],
            'stride64': in_data[2],
            'stride32': in_data[1],
            'stride16': in_data[0],
        }
        cls_prob_dict = {
            'stride128': in_data[7],
            'stride64': in_data[6],
            'stride32': in_data[5],
            'stride16': in_data[4],
        }
        im_info = in_data[8].asnumpy()[0, :]
        im = in_data[9].asnumpy()

        proposal_list = []
        score_list = []

        destore_rois_list = []
        destore_cls_list = []

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            # im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes

            height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
            # Same story for the scores:
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            proposals = clip_boxes(proposals, im_info[:2])

            scores = cls_prob_dict['stride' + str(s)].asnumpy()
            s_list = []
            start = 0

            for i in range(self._num_classes):
                s = scores[:, start:start + self._num_anchors, :, :]
                start = start + self._num_anchors
                s = self._clip_pad(s, (height, width))
                s = s.transpose((0, 2, 3, 1)).reshape((-1, 1))
                s_list.append(s)
            scores = np.concatenate(s_list, axis=1)

            destore_rois_list.append(proposals)
            destore_cls_list.append(scores)

        destore_rois = np.concatenate(destore_rois_list, axis=0)
        destore_cls = np.concatenate(destore_cls_list, axis=0)

        #    print destore_cls
        s = np.max(destore_cls, axis=1)
        #      print s

        order = s.ravel().argsort()[::-1]
        order = order[:self._keep_num]
        destore_cls = destore_cls[order, :]
        destore_rois = destore_rois[order, :]

        vis = False
        if vis:
            vis_all_detection(im, destore_rois[:, :])

        self.assign(out_data[0], req[0], mx.nd.array(destore_rois))

        self.assign(out_data[1], req[1], mx.nd.array(destore_cls))
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cls_prob_dict = {
            'stride64': in_data[4],
            'stride32': in_data[3],
            'stride16': in_data[2],
            'stride8': in_data[1],
            'stride4': in_data[0],
        }
        bbox_pred_dict = {
            'stride64': in_data[9],
            'stride32': in_data[8],
            'stride16': in_data[7],
            'stride8': in_data[6],
            'stride4': in_data[5],
        }

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
            scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
Esempio n. 31
0
    def forward(self, is_train, req, in_data, out_data, aux):
        assert self._batch_rois == -1 or self._batch_rois % self._batch_images == 0, \
            'batchimages {} must devide batch_rois {}'.format(self._batch_images, self._batch_rois)
        all_rois = in_data[0].asnumpy()
        gt_boxes = in_data[1].asnumpy()
        im = in_data[2].asnumpy()

        if self._batch_rois == -1:
            rois_per_image = all_rois.shape[0] + gt_boxes.shape[0]
            fg_rois_per_image = rois_per_image
        else:
            rois_per_image = self._batch_rois / self._batch_images
            fg_rois_per_image = np.round(self._fg_fraction *
                                         rois_per_image).astype(int)

        # Include ground-truth boxes in the set of candidate rois
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1]))))
        # Sanity check: single batch only
        assert np.all(
            all_rois[:, 0] == 0), 'Only single item batches are supported'

        rois, labels, bbox_targets, bbox_weights ,layer_indexs= \
            sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes,sample_type='fpn', k0 = 4)

        vis = False
        if vis:
            ind = np.where(labels != 0)[0]
            im_shape = im.shape
            pred_boxes = bbox_pred(rois[:, 1:], bbox_targets)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
            l = labels[ind]
            ro = rois[ind, 1:]
            b = bbox_targets[ind, :]
            p = pred_boxes[ind, :] * bbox_weights[ind, :]
            r = []
            for i in range(p.shape[0]):
                r.append(p[i, l[i] * 4:l[i] * 4 + 4])
            r_ = np.vstack(r)

            vis_all_detection(im, r_, l, 1)
        rois_all = np.zeros((self._batch_rois * 4, 5), dtype=rois.dtype)
        labels_all = np.ones((self._batch_rois * 4, ), dtype=labels.dtype) * -1
        bbox_targets_all = np.zeros(
            (self._batch_rois * 4, self._num_classes * 4),
            dtype=bbox_targets.dtype)
        bbox_weights_all = np.zeros(
            (self._batch_rois * 4, self._num_classes * 4),
            dtype=bbox_weights.dtype)
        for i in range(4):
            index = (layer_indexs == (i + 2))

            num_index = sum(index)
            start = self._batch_rois * i
            end = start + num_index
            index_range = range(start, end)
            rois_all[index_range, :] = rois[index, :]
            labels_all[index_range] = labels[index]
            bbox_targets_all[index_range, :] = bbox_targets[index, :]
            bbox_weights_all[index_range, :] = bbox_weights[index, :]

        if DEBUG:
            print "labels=", labels
            print 'num fg: {}'.format((labels > 0).sum())
            print 'num bg: {}'.format((labels == 0).sum())
            self._count += 1
            self._fg_num += (labels > 0).sum()
            self._bg_num += (labels == 0).sum()
            print "self._count=", self._count
            print 'num fg avg: {}'.format(self._fg_num / self._count)
            print 'num bg avg: {}'.format(self._bg_num / self._count)
            print 'ratio: {:.3f}'.format(
                float(self._fg_num) / float(self._bg_num))
        for ind, val in enumerate(
            [rois_all, labels_all, bbox_targets_all, bbox_weights_all]):
            self.assign(out_data[ind], req[0], val)
Esempio n. 32
0
def im_detect(predictor, data_batch, data_names, scales, cfg, count):
    global im_shape
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        # im_shape = data_dict['data'].shape
        im_shape_tmp = data_dict['data'].shape
        if len(im_shape_tmp) == 4:
            im_shape = im_shape_tmp

        # import pickle

        # pickle.dump(im_shape, open('im_shape'+str(count)+'.pkl', 'wb'))

        # pickle.dump(rois, open('rois'+str(count)+'.pkl','wb'))

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        # pickle.dump(scores, open('scores'+str(count)+'.pkl','wb'))

        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        # pickle.dump(bbox_deltas, open('bbox_deltas'+str(count)+'.pkl','wb'))

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        # pickle.dump(pred_boxes, open('bbox_pred'+str(count)+'.pkl','wb'))

        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        # pickle.dump(pred_boxes, open('clip_boxes'+str(count)+'.pkl','wb'))

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)

    if output_all[0].has_key('feat_conv_3x3_relu_output'):
        feat = output_all[0]['feat_conv_3x3_relu_output']
        feat_after_motion = None
    else:
        feat = None
        feat_after_motion = output_all[0]['warping_feat_output']

    #print(output_all[0])

    # For debugging
    # rpn_cls_score_output = output_all[0]['rpn_cls_score_output']
    # rpn_bbox_pred_output = output_all[0]['rpn_bbox_pred_output']
    # rpn_cls_prob_output = output_all[0]['rpn_cls_prob_output']
    # rois_output = output_all[0]['rois_output']
    # rfcn_cls_output = output_all[0]['rfcn_cls_output']
    # rfcn_bbox_output = output_all[0]['rfcn_bbox_output']
    # cls_score_output = output_all[0]['ave_cls_scors_rois_output']
    bbox_pred1_output = output_all[0]['ave_bbox_pred_rois_output']
    bbox_pred2_output = output_all[0]['bbox_pred_reshape2_output']
    bbox_pred_output = output_all[0]['bbox_pred_reshape_output']

    return scores_all, pred_boxes_all, data_dict_all, feat, bbox_pred1_output, bbox_pred2_output, bbox_pred_output
def im_double_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    ref_scores_all = []
    ref_pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN or cfg.network.ROIDispatch:
            concat_rois = output['concat_rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        if cfg.TEST.LEARN_NMS:
            concat_pred_boxes = output['concat_sorted_bbox_output'].asnumpy()
            # raw_scores = output['sorted_score_output'].asnumpy()
            concat_nms_scores = output['nms_final_score_output'].asnumpy()

            concat_pre_nms_scores = output['pre_nms_score_output'].asnumpy()
            # we used scaled image & roi to train, so it is necessary to transform them back
            concat_pred_boxes = concat_pred_boxes / scale

            concat_multi_scores = np.dstack(
                (concat_nms_scores, concat_pre_nms_scores))

            # concat_nms_scores /= concat_pre_nms_scores
            pred_boxes, ref_pred_boxes = np.split(concat_pred_boxes, 2)
            scores, ref_scores = np.split(concat_multi_scores, 2)

            pred_boxes_all.append(pred_boxes)
            ref_pred_boxes_all.append(ref_pred_boxes)
            scores_all.append(scores)
            ref_scores_all.append(ref_scores)

            nms_multi_target = output['stable_nms_multi_target'].asnumpy()
            target, ref_target = np.split(nms_multi_target, 2)
            concat_target_boxes = concat_pred_boxes[np.where(nms_multi_target)
                                                    [:2]]
            concat_target_scores = concat_nms_scores[np.where(nms_multi_target)
                                                     [:2]]
            concat_pre_target_scores = concat_pre_nms_scores[np.where(
                nms_multi_target)[:2]]

            # concat_target_boxes = concat_target_boxes / scale

            # construct gt style nms_multi_target, 0:30 classes
            concat_target_boxes = np.hstack(
                (concat_target_boxes,
                 np.where(nms_multi_target)[1][:, np.newaxis]))
            concat_target_boxes = np.hstack(
                (concat_target_boxes, concat_target_scores[:, np.newaxis]))
            concat_target_boxes = np.hstack(
                (concat_target_boxes, concat_pre_target_scores[:, np.newaxis]))

            target_boxes, ref_target_boxes = np.split(concat_target_boxes, 2)

            data_dict['nms_multi_target'] = target_boxes
            data_dict['ref_nms_multi_target'] = ref_target_boxes

            concat_nms_feats = output[
                'concat_nms_embedding_feat_output'].asnumpy()
            concat_target_feats = concat_nms_feats[np.where(nms_multi_target)
                                                   [:2]]
            nms_feats, ref_nms_feats = np.split(concat_nms_feats, 2)
            target_nms_feats, ref_target_nms_feats = np.split(
                concat_target_feats, 2)
            high_score_feats = nms_feats[np.where(scores[:, :, 0] > 0.2)[:2]]
            ref_high_score_feats = ref_nms_feats[np.where(
                ref_scores[:, :, 0] > 0.2)[:2]]
            from scipy.spatial.distance import cosine
            dist_mat = np.zeros(
                (target_nms_feats.shape[0], high_score_feats.shape[0]),
                dtype=np.float)
            ref_dist_mat = np.zeros(
                (ref_target_nms_feats.shape[0], ref_high_score_feats.shape[0]),
                dtype=np.float)
            for i, nms_feat in enumerate(target_nms_feats):
                for j, high_score_feat in enumerate(high_score_feats):
                    dist_mat[i, j] = cosine(nms_feat, high_score_feat)
            for i, nms_feat in enumerate(ref_target_nms_feats):
                for j, high_score_feat in enumerate(ref_high_score_feats):
                    ref_dist_mat[i, j] = cosine(nms_feat, high_score_feat)
            pdb.set_trace()
            print(dist_mat)
            print(ref_dist_mat)

        else:
            rois, ref_rois = np.split(concat_rois, 2)
            scores = output['cls_prob_reshape_output'].asnumpy()[0]
            bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
            ref_scores = output['cls_prob_reshape_output'].asnumpy()[1]
            ref_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[1]

            # post processing
            pred_boxes = bbox_pred(rois, bbox_deltas)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
            pred_boxes /= scale

            ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas)
            ref_pred_boxes = clip_boxes(ref_pred_boxes, im_shape[-2:])
            ref_pred_boxes /= scale

            pred_boxes_all.append(pred_boxes)
            scores_all.append(scores)
            ref_pred_boxes_all.append(ref_pred_boxes)
            ref_scores_all.append(ref_scores)

    return scores_all, pred_boxes_all, ref_scores_all, ref_pred_boxes_all, data_dict_all
Esempio n. 34
0
def im_detect_bbox_aug(net,
                       nms_wrapper,
                       img_path,
                       scales,
                       pixel_means,
                       bbox_stds,
                       ctx,
                       threshold=1e-3,
                       viz=False):
    all_bboxes = []
    all_scores = []
    img_ori = cv2.imread(img_path.encode("utf-8"))
    for scale_min, scale_max in scales:
        fscale = 1.0 * scale_min / min(img_ori.shape[:2])
        img_resized = cv2.resize(img_ori, (0, 0), fx=fscale, fy=fscale)
        h, w, c = img_resized.shape
        h_padded = h if h % 32 == 0 else h + 32 - h % 32
        w_padded = w if w % 32 == 0 else w + 32 - w % 32
        img_padded = np.zeros(shape=(h_padded, w_padded, c),
                              dtype=img_resized.dtype)
        img_padded[:h, :w, :] = img_resized
        img = transform(img_padded, pixel_means=pixel_means)
        im_info = nd.array([[h_padded, w_padded, 1.0]], ctx=ctx[0])
        data = nd.array(img, ctx=ctx[0])

        rois, scores, bbox_deltas = net(data, im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)
        bbox = clip_boxes(bbox, data.shape[2:4])
        bbox /= fscale
        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

        # hflip
        rois, scores, bbox_deltas = net(data[:, :, :, ::-1], im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)
        bbox = clip_boxes(bbox, data.shape[2:4])

        tmp = bbox[:, 0::4].copy()
        bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1  # x0 = w - x0
        bbox[:, 2::4] = data.shape[3] - tmp - 1  # x1 = w -x1
        bbox /= fscale

        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

        #
        # vflip
        rois, scores, bbox_deltas = net(data[:, :, ::-1], im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)

        bbox = clip_boxes(bbox, data.shape[2:4])

        tmp = bbox[:, 1::4].copy()
        bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1  # x0 = w - x0
        bbox[:, 3::4] = data.shape[2] - tmp - 1  # x1 = w -x1
        bbox /= fscale

        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

        # vhflip
        rois, scores, bbox_deltas = net(data[:, :, ::-1, ::-1], im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)

        bbox = clip_boxes(bbox, data.shape[2:4])

        tmp = bbox[:, 1::4].copy()
        bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1  # x0 = w - x0
        bbox[:, 3::4] = data.shape[2] - tmp - 1  # x1 = w -x1

        tmp = bbox[:, 0::4].copy()
        bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1  # x0 = w - x0
        bbox[:, 2::4] = data.shape[3] - tmp - 1  # x1 = w -x1

        bbox /= fscale

        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

    all_bboxes = np.concatenate(all_bboxes, axis=0)
    all_scores = np.concatenate(all_scores, axis=0)
    pred_bboxes = []
    pred_scores = []
    pred_clsid = []
    for j in range(1, all_scores.shape[1]):
        cls_scores = all_scores[:, j, np.newaxis]
        cls_boxes = all_bboxes[:, 4:
                               8] if config.CLASS_AGNOSTIC else all_bboxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
        cls_dets = np.hstack((cls_boxes, cls_scores))
        keep = nms_wrapper(cls_dets.astype('f'))
        cls_dets = cls_dets[keep, :]
        cls_dets = cls_dets[cls_dets[:, -1] > threshold, :]
        pred_bboxes.append(cls_dets[:, :4])
        pred_scores.append(cls_dets[:, 4])
        pred_clsid.append(j *
                          np.ones(shape=(cls_dets.shape[0], ), dtype=np.int))
    pred_bboxes = np.concatenate(pred_bboxes, axis=0)
    pred_scores = np.concatenate(pred_scores, axis=0)
    pred_clsid = np.concatenate(pred_clsid, axis=0)
    if viz:
        import gluoncv
        import matplotlib.pyplot as plt
        gluoncv.utils.viz.plot_bbox(img_ori[:, :, ::-1],
                                    bboxes=pred_bboxes,
                                    scores=pred_scores,
                                    labels=pred_clsid,
                                    thresh=.5)
        plt.show()
    return pred_bboxes, pred_scores, pred_clsid