Python clip_boxes Beispiele

Programmiersprache: Python

Namespace / Paketname: lib.bbox.bbox_transform

Methode / Funktion: clip_boxes

Beispiele auf hotexamples.com: 10

Python clip_boxes - 10 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die lib.bbox.bbox_transform.clip_boxes, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: box_parser.py Projekt: danxifuer/InstanceSegment

    def forward(self, is_train, req, in_data, out_data, aux):

        bottom_rois = in_data[0].asnumpy()
        bbox_delta = in_data[1].asnumpy()
        cls_prob = in_data[2].asnumpy()
        im_info = in_data[3].asnumpy()

        num_rois = bottom_rois.shape[0]
        # 1. judge if bbox class-agnostic
        # 2. if not, calculate bbox_class_idx
        if self._bbox_class_agnostic:
            bbox_class_idx = np.ones((num_rois))  # (num_rois, 1) zeros
        else:
            bbox_class_idx = np.argmax(cls_prob[:, 1:], axis=1) + 1
        bbox_class_idx = bbox_class_idx[:, np.newaxis] * 4
        bbox_class_idx = np.hstack((bbox_class_idx, bbox_class_idx + 1,
                                    bbox_class_idx + 2, bbox_class_idx + 3))

        # 3. get bbox_pred given bbox_class_idx
        rows = np.arange(num_rois, dtype=np.intp)
        bbox_delta = bbox_delta[rows[:, np.newaxis],
                                bbox_class_idx.astype(np.intp)]

        # 4. calculate bbox_delta by bbox_pred[i] * std[i] + mean[i]
        means = np.array(self._bbox_means)
        stds = np.array(self._bbox_stds)
        vx = bbox_delta[:, 0] * stds[0] + means[0]
        vy = bbox_delta[:, 1] * stds[1] + means[1]
        vw = bbox_delta[:, 2] * stds[2] + means[2]
        vh = bbox_delta[:, 3] * stds[3] + means[3]
        bbox_delta = np.hstack((vx[:, np.newaxis], vy[:, np.newaxis],
                                vw[:, np.newaxis], vh[:, np.newaxis]))

        # 6. calculate top_rois by bbox_pred
        proposal = bbox_pred(bottom_rois[:, 1:], bbox_delta)

        # 7. clip boxes
        if self._b_clip_boxes:
            proposal = clip_boxes(proposal, im_info[0, :2])

        output = bottom_rois
        output[:, 1:] = proposal

        for ind, val in enumerate([output]):
            self.assign(out_data[ind], req[ind], val)

Beispiel #2

Datei anzeigen

def coco_results_one_category_kernel(data_pack):
    cat_id = data_pack['cat_id']
    ann_type = data_pack['ann_type']
    binary_thresh = data_pack['binary_thresh']
    all_im_info = data_pack['all_im_info']
    boxes = data_pack['boxes']
    if ann_type == 'bbox':
        masks = []
    elif ann_type == 'segm':
        masks = data_pack['masks']
    else:
        print('unimplemented ann_type: ' + ann_type)
    cat_results = []
    for im_ind, im_info in enumerate(all_im_info):
        index = im_info['index']
        dets = boxes[im_ind].astype(np.float)
        if len(dets) == 0:
            continue
        scores = dets[:, -1]
        if ann_type == 'bbox':
            xs = dets[:, 0]
            ys = dets[:, 1]
            ws = dets[:, 2] - xs + 1
            hs = dets[:, 3] - ys + 1
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'bbox': [xs[k], ys[k], ws[k], hs[k]],
                'score': scores[k]
            } for k in range(dets.shape[0])]
        elif ann_type == 'segm':
            width = im_info['width']
            height = im_info['height']
            dets[:, :4] = clip_boxes(dets[:, :4], [height, width])
            mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height,
                                        width, binary_thresh)
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'segmentation': mask_encode[k],
                'score': scores[k]
            } for k in range(len(mask_encode))]
        cat_results.extend(result)
    return cat_results

Beispiel #3

Datei anzeigen

Datei: image.py Projekt: danxifuer/InstanceSegment

def get_image(roidb, config):
    """
    preprocess image and return processed roidb
    :param roidb: a list of roidb
    :return: list of img as in mxnet format
    roidb add new item['im_info']
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    num_images = len(roidb)
    processed_ims = []
    processed_roidb = []
    for i in range(num_images):
        roi_rec = roidb[i]
        assert os.path.exists(roi_rec['image']), '%s does not exist'.format(
            roi_rec['image'])
        im = cv2.imread(roi_rec['image'],
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        new_rec = roi_rec.copy()
        scale_ind = random.randrange(len(config.SCALES))
        target_size = config.SCALES[scale_ind][0]
        max_size = config.SCALES[scale_ind][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        processed_ims.append(im_tensor)
        im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
        new_rec['boxes'] = clip_boxes(
            np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
        new_rec['im_info'] = im_info
        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb

Beispiel #4

Datei anzeigen

Datei: nuclei_demo.py Projekt: danxifuer/InstanceSegment

def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    # ctx_id = [mx.cpu()]
    print(ctx_id)
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 2
    classes = ['nuclei']

    # load demo data
    # image_root = '/home/daiab/machine_disk/work/kaggle_nuclei/data/stage1_test_images'
    image_root = '/home/daiab/machine_disk/work/kaggle_nuclei/data/LikeVOC/img'
    image_names = glob.glob(os.path.join(image_root, '*.png'))
    data = []
    for im_name in image_names:
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in range(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in range(len(data))]
    provide_label = [None for i in range(len(data))]
    arg_params, aux_params = load_param(
        '/home/daiab/machine_disk/projects/FCIS/'
        'output/fcis/nuclei/nuclei_end2end/SDS_train/e2e',
        2,
        process=True)
    data = data
    max_data_shape = max_data_shape
    provide_data = provide_data
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in range(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]
        # print('-----------', data_batch)
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in range(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in range(num_classes)]
            all_masks = [[] for _ in range(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, ctx_id[0])
            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
            # print(dets)
            # print(masks)
        print('testing {} {:.4f}s'.format(im_name, toc()))
        # visualize
        for i in range(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config)

    print('done')

Beispiel #5

Datei anzeigen

def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    # ctx_id = [mx.cpu()]
    print(ctx_id)
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    # load demo data
    image_names = ['COCO_test2015_000000000275.jpg', 'COCO_test2015_000000001412.jpg', 'COCO_test2015_000000073428.jpg',
                    'COCO_test2015_000000393281.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in range(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in range(len(data))]
    provide_label = [None for i in range(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/fcis_coco', 0, process=True)
    data = data
    max_data_shape = max_data_shape
    provide_data = provide_data
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in range(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data))]
        # print('-----------', data_batch)
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data))]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        im_shapes = [data_batch.data[i][0].shape[2:4] for i in range(len(data_batch.data))]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in range(num_classes)]
            all_masks = [[] for _ in range(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            print('=============')
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes,
                                                        100, im_width, im_height,
                                                        config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                                                        config.BINARY_THRESH, ctx_id[0])
            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)]
            # print(dets)
            # print(masks)
        print('testing {} {:.4f}s'.format(im_name, toc()))
        # visualize
        for i in range(len(dets)):
            keep = np.where(dets[i][:,-1]>0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config)

    print('done')

Beispiel #6

Datei anzeigen

    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._nms_thresh, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError('Sorry, multiple images each device is not implemented')

        pre_nms_top_n = self._rpn_pre_nms_top_n
        post_nms_top_n = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = self._filter_boxes(proposals, min_size)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_top_n > 0:
            order = order[:pre_nms_top_n]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_top_n > 0:
            keep = keep[:post_nms_top_n]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_top_n:
            pad = np.random.choice(keep, size=post_nms_top_n - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))

Beispiel #7

Datei anzeigen

    def forward(self,is_train, req, in_data, out_data, aux):
        batch_size = in_data[0].shape[0]

        if batch_size > 1:
            raise ValueError('sorry, only support single image')
        
        cls_prob_dict = {
            'stride64': in_data[4],
            'stride32': in_data[3],
            'stride16': in_data[2],
            'stride8': in_data[1],
            'stride4': in_data[0],
        }
        bbox_pred_dict = {
            'stride64': in_data[9],
            'stride32': in_data[8],
            'stride16': in_data[7],
            'stride8': in_data[6],
            'stride4': in_data[5],
        }
        #print(in_data[0])
        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        for idx, s in enumerate(self._feat_stride):
            stride = int(s)
            sub_anchors = generate_anchors(base_size = stride, scales = [self._scales[idx]],ratios = self._ratios)
            scores = cls_prob_dict['stride'+str(s)].asnumpy()[:,self._num_anchors:,:,:]
            bbox_deltas = bbox_pred_dict['stride'+str(s)].asnumpy()
            im_info = in_data[-1].asnumpy()[0,:]

            #step 1
            height, width = int(im_info[0]/stride), int(im_info[1] / stride)
            shift_x = np.arange(width) * stride
            shift_y = np.arange(height) * stride
            shift_x, shift_y = np.meshgrid(shift_x,shift_y)
            shift = np.vstack([shift_x.ravel(),shift_y.ravel(),shift_x.ravel(),shift_y.ravel()]).transpose()
            #shift: K*4
            anchors = (sub_anchors[None,:,:] + shift[:,None,:]).reshape((-1,4))

            bbox_deltas = self._clip_pad(bbox_deltas, (height,width))
            bbox_deltas = bbox_deltas.transpose((0,2,3,1)).reshape((-1,4))

            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0,2,3,1)).reshape((-1,1))   #(1,W*W*A,1)

            proposals = bbox_pred(anchors, bbox_deltas)

            #step 2
            proposals = clip_boxes(proposals, im_info[:2])
            
            #step 3
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep,:]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)
        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # step 4
        order = scores.ravel().argsort()[::-1]

        #step 5
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        

        proposals = proposals[order,:]
        scores=scores[order]

        #step 6
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det, self._threshold,in_data[0].context.device_id)
        #step 7
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        #step 8
        if len(keep) < post_nms_topN:
            pad = np.random.choice(keep, size=post_nms_topN - len(keep))
            keep  = np.hstack((keep,pad))
        proposals = proposals[keep,:]
        scores = scores[keep]

        batch_inds = np.zeros((proposals.shape[0],1), dtype = np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,copy = False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy = False))

Beispiel #8

Datei anzeigen

Datei: proposal_layer0.py Projekt: jdd803/OCR1

def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key,
                       _feat_stride, anchor_scales):
    '''
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    '''
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  #anchor_scales(8,16,32)
    _num_anchors = _anchors.shape[0]
    rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob,
                                     [0, 3, 1, 2])  #(n,18,H,W)
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])  #(n,36,H,W)

    # Only minibatch of 1 supported
    assert rpn_bbox_cls_prob.shape[0] == 1, \
        'Only single item batches are supported'

    if cfg_key == 'TRAIN':
        pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TRAIN.RPN_NMS_THRESH
        min_size = cfg.TRAIN.RPN_MIN_SIZE
    else:  # cfg_key == 'TEST':
        pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TEST.RPN_NMS_THRESH
        min_size = cfg.TEST.RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_bbox_cls_prob[:, _num_anchors:, :, :]  #(n,9,H,W)
    bbox_deltas = rpn_bbox_pred  #(n,36,H,W)

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))  #(1*h*w*a,4)

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)  #(1*h*w*a,4)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_dims)  #(1*h*w*a,4)

    # 3. remove predicted boxes with either height or width < threshold
    keep = _filter_boxes(proposals, min_size)
    proposals = proposals[keep, :]  #（-1,4)
    scores = scores[keep]  #（-1,4)

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                   copy=False)))  #(n,5)
    return blob

Beispiel #9

Datei anzeigen

Datei: tester.py Projekt: danxifuer/InstanceSegment

def pred_eval(predictor,
              test_data,
              imdb,
              cfg,
              vis=True,
              thresh=1e-3,
              logger=None,
              ignore_cache=False):
    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    seg_file = os.path.join(imdb.result_path, imdb.name + '_masks.pkl')

    if os.path.exists(det_file) and os.path.exists(
            seg_file) and not ignore_cache:
        with open(det_file, 'rb') as f:
            all_boxes = cPickle.load(f)
        with open(seg_file, 'rb') as f:
            all_masks = cPickle.load(f)
    else:
        assert vis or not test_data.shuffle
        data_names = [k[0] for k in test_data.provide_data[0]]

        if not isinstance(test_data, PrefetchingIter):
            test_data = PrefetchingIter(test_data)

        # function pointers
        nms = py_nms_wrapper(cfg.TEST.NMS)
        mask_voting = gpu_mask_voting if cfg.TEST.USE_GPU_MASK_MERGE else cpu_mask_voting

        max_per_image = 100 if cfg.TEST.USE_MASK_MERGE else -1
        num_images = imdb.num_images
        all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(imdb.num_classes)]
        all_masks = [[[] for _ in range(num_images)]
                     for _ in range(imdb.num_classes)]

        idx = 0
        t = time.time()
        for data_batch in test_data:
            t1 = time.time() - t
            t = time.time()

            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in range(len(data_batch.data))
            ]
            scores_all, boxes_all, masks_all, data_dict_all = im_detect(
                predictor, data_batch, data_names, scales, cfg)
            im_shapes = [
                data_batch.data[i][0].shape[2:4]
                for i in range(len(data_batch.data))
            ]

            t2 = time.time() - t
            t = time.time()

            # post processing
            for delta, (scores, boxes, masks, data_dict) in enumerate(
                    zip(scores_all, boxes_all, masks_all, data_dict_all)):

                if not cfg.TEST.USE_MASK_MERGE:
                    for j in range(1, imdb.num_classes):
                        indexes = np.where(scores[:, j] > thresh)[0]
                        cls_scores = scores[indexes, j, np.newaxis]
                        cls_masks = masks[indexes, 1, :, :]
                        try:
                            if cfg.CLASS_AGNOSTIC:
                                cls_boxes = boxes[indexes, :]
                            else:
                                raise Exception()
                        except:
                            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4]

                        cls_dets = np.hstack((cls_boxes, cls_scores))
                        keep = nms(cls_dets)
                        all_boxes[j][idx + delta] = cls_dets[keep, :]
                        all_masks[j][idx + delta] = cls_masks[keep, :]
                else:
                    masks = masks[:, 1:, :, :]
                    im_height = np.round(im_shapes[delta][0] /
                                         scales[delta]).astype('int')
                    im_width = np.round(im_shapes[delta][1] /
                                        scales[delta]).astype('int')
                    boxes = clip_boxes(boxes, (im_height, im_width))
                    result_mask, result_box = mask_voting(
                        masks, boxes, scores, imdb.num_classes, max_per_image,
                        im_width, im_height, cfg.TEST.NMS,
                        cfg.TEST.MASK_MERGE_THRESH, cfg.BINARY_THRESH)
                    # print(all_boxes)
                    # print(result_box)
                    for j in range(1, imdb.num_classes):
                        all_boxes[j][idx + delta] = result_box[j]
                        all_masks[j][idx + delta] = result_mask[j][:, 0, :, :]
                print(all_boxes)
                print(all_masks)
                if True:
                    boxes_this_image = [[]] + [
                        all_boxes[j][idx + delta]
                        for j in range(1, imdb.num_classes)
                    ]
                    masks_this_image = [[]] + [
                        all_masks[j][idx + delta]
                        for j in range(1, imdb.num_classes)
                    ]
                    vis_all_mask(data_dict['data'].asnumpy(), boxes_this_image,
                                 masks_this_image, imdb.classes, scales[delta],
                                 cfg)

            idx += test_data.batch_size
            t3 = time.time() - t
            t = time.time()

            print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                idx, imdb.num_images, t1, t2, t3))
            if logger:
                logger.info(
                    'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                    format(idx, imdb.num_images, t1, t2, t3))

        with open(det_file, 'wb') as f:
            cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)
        with open(seg_file, 'wb') as f:
            cPickle.dump(all_masks, f, protocol=cPickle.HIGHEST_PROTOCOL)

    info_str = imdb.evaluate_sds(all_boxes, all_masks)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))

Beispiel #10

Datei anzeigen

    def forward(self, is_train, req, in_data, out_data, aux):

        cls_pro = in_data[4]

        bbox_pred_dict = {
            'stride128': in_data[3],
            'stride64': in_data[2],
            'stride32': in_data[1],
            'stride16': in_data[0],
        }
        cls_prob_dict = {
            'stride128': in_data[7],
            'stride64': in_data[6],
            'stride32': in_data[5],
            'stride16': in_data[4],
        }
        im_info = in_data[8].asnumpy()[0, :]
        im = in_data[9].asnumpy()

        proposal_list = []
        score_list = []

        destore_rois_list = []
        destore_cls_list = []

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            # im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes

            height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
            # Same story for the scores:
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            proposals = clip_boxes(proposals, im_info[:2])

            scores = cls_prob_dict['stride' + str(s)].asnumpy()
            s_list = []
            start = 0

            for i in range(self._num_classes):
                s = scores[:, start:start + self._num_anchors, :, :]
                start = start + self._num_anchors
                s = self._clip_pad(s, (height, width))
                s = s.transpose((0, 2, 3, 1)).reshape((-1, 1))
                s_list.append(s)
            scores = np.concatenate(s_list, axis=1)

            destore_rois_list.append(proposals)
            destore_cls_list.append(scores)

        destore_rois = np.concatenate(destore_rois_list, axis=0)
        destore_cls = np.concatenate(destore_cls_list, axis=0)

        #    print destore_cls
        s = np.max(destore_cls, axis=1)
        #      print s

        order = s.ravel().argsort()[::-1]
        order = order[:self._keep_num]
        destore_cls = destore_cls[order, :]
        destore_rois = destore_rois[order, :]

        vis = False
        if vis:
            vis_all_detection(im, destore_rois[:, :])

        self.assign(out_data[0], req[0], mx.nd.array(destore_rois))

        self.assign(out_data[1], req[1], mx.nd.array(destore_cls))