Beispiel #1
0
def get_image(roidb, config):
    """
    preprocess image and return processed roidb
    :param roidb: a list of roidb
    :return: list of img as in mxnet format
    roidb add new item['im_info']
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    num_images = len(roidb)
    processed_ims = []
    processed_roidb = []
    for i in range(num_images):
        roi_rec = roidb[i]
        assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image'])
        im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        new_rec = roi_rec.copy()
        scale_ind = random.randrange(len(config.SCALES))
        target_size = config.SCALES[scale_ind][0]
        max_size = config.SCALES[scale_ind][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        processed_ims.append(im_tensor)
        im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
        new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
        new_rec['im_info'] = im_info
        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb
Beispiel #2
0
def im_detect(predictor, data_batch, data_names, scale):
    st = time.time()
    output = predictor.predict(data_batch)
    et = time.time()
    print 'predict{:.4f}s'.format(et - st)
    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
    im_shape = data_dict['data'].shape

    # save output
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    # post processing
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    # we used scaled image & roi to train, so it is necessary to transform them back
    pred_boxes = pred_boxes / scale
    et = time.time()
    print 'im_detect{:.4f}s'.format(et - st)
    return scores, pred_boxes, data_dict
Beispiel #3
0
def im_detect(predictor, data_batch, data_names, scale):
    output = predictor.predict(data_batch)

    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
    im_shape = data_dict['data'].shape

    # save output
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
    import ipdb
    ipdb.set_trace()

    # post processing
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    # we used scaled image & roi to train, so it is necessary to transform them back
    pred_boxes = pred_boxes / scale

    if config.HAS_PART:
        head_scores = output['head_prob_reshape_output'].asnumpy()[0]
        head_gids = np.argmax(head_scores, axis=1)
        head_deltas = output['head_pred_reshape_output'].asnumpy()[0]
        # means = config.TRAIN.BBOX_MEANS
        stds = np.reshape(np.array(config.TRAIN.BBOX_STDS), (-1, 4))
        head_deltas *= np.tile(stds, (1, head_scores.shape[1]))

        head_boxes = pred_head(rois, head_deltas, head_gids,
                               config.PART_GRID_HW)
        head_boxes /= scale

        joints_scores = [
            output['joint_prob{}_reshape_output'.format(i)].asnumpy()[0]
            for i in range(4)
        ]
        joints_gids = [np.argmax(j, axis=1) for j in joints_scores]
        joints_deltas = [
            output['joint_pred{}_reshape_output'.format(i)].asnumpy()[0]
            for i in range(4)
        ]
        joints_deltas = [
            j * np.tile(stds[:, :2], (1, head_scores.shape[1]))
            for j in joints_deltas
        ]

        joints = [pred_joint(rois, jd, jid, config.PART_GRID_HW) \
                for (jd, jid) in zip(joints_deltas, joints_gids)]
        joints = np.hstack(joints)
        joints /= scale

        return scores, pred_boxes, head_boxes, joints, data_dict

    return scores, pred_boxes, data_dict
Beispiel #4
0
    def im_detect(self, im_array, im_info=None, roi_array=None):
        """
        perform detection of designated im, box, must follow minibatch.get_testbatch format
        :param im_array: numpy.ndarray [b c h w]
        :param im_info: numpy.ndarray [b 3]
        :param roi_array: numpy.ndarray [roi_num 5]
        :return: scores, pred_boxes
        """
        # fill in data
        if config.TEST.HAS_RPN:
            self.arg_params['data'] = mx.nd.array(im_array, self.ctx)
            self.arg_params['im_info'] = mx.nd.array(im_info, self.ctx)
            arg_shapes, out_shapes, aux_shapes = \
                self.symbol.infer_shape(data=self.arg_params['data'].shape, im_info=self.arg_params['im_info'].shape)
        else:
            self.arg_params['data'] = mx.nd.array(im_array, self.ctx)
            self.arg_params['rois'] = mx.nd.array(roi_array, self.ctx)
            arg_shapes, out_shapes, aux_shapes = \
                self.symbol.infer_shape(data=self.arg_params['data'].shape, rois=self.arg_params['rois'].shape)

        # fill in label
        arg_shapes_dict = {
            name: shape
            for name, shape in zip(self.symbol.list_arguments(), arg_shapes)
        }
        self.arg_params['cls_prob_label'] = mx.nd.zeros(
            arg_shapes_dict['cls_prob_label'], self.ctx)

        # execute
        self.executor = self.symbol.bind(self.ctx,
                                         self.arg_params,
                                         args_grad=None,
                                         grad_req='null',
                                         aux_states=self.aux_params)
        output_dict = {
            name: nd
            for name, nd in zip(self.symbol.list_outputs(),
                                self.executor.outputs)
        }
        self.executor.forward(is_train=False)

        # save output
        scores = output_dict['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0]
        if config.TEST.HAS_RPN:
            rois = output_dict['rois_output'].asnumpy()[:, 1:]
        else:
            rois = roi_array[:, 1:]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_array[0].shape[-2:])

        return scores, pred_boxes
Beispiel #5
0
def coco_results_one_category_kernel(data_pack):
    cat_id = data_pack['cat_id']
    ann_type = data_pack['ann_type']
    binary_thresh = data_pack['binary_thresh']
    all_im_info = data_pack['all_im_info']
    boxes = data_pack['boxes']
    if ann_type == 'bbox':
        masks = []
    elif ann_type == 'segm':
        masks = data_pack['masks']
    else:
        print 'unimplemented ann_type: ' + ann_type
    cat_results = []
    for im_ind, im_info in enumerate(all_im_info):
        index = im_info['index']
        try:
            dets = boxes[im_ind].astype(np.float)
        except:
            dets = boxes[im_ind]
        if len(dets) == 0:
            continue
        scores = dets[:, -1]
        if ann_type == 'bbox':
            xs = dets[:, 0]
            ys = dets[:, 1]
            ws = dets[:, 2] - xs + 1
            hs = dets[:, 3] - ys + 1
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'bbox': [xs[k], ys[k], ws[k], hs[k]],
                'score': scores[k]
            } for k in xrange(dets.shape[0])]
        elif ann_type == 'segm':
            width = im_info['width']
            height = im_info['height']
            dets[:, :4] = clip_boxes(dets[:, :4], [height, width])
            mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height,
                                        width, binary_thresh)
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'segmentation': mask_encode[k],
                'score': scores[k]
            } for k in xrange(len(mask_encode))]
        cat_results.extend(result)
    return cat_results
Beispiel #6
0
def im_detect(predictor, data_batch, data_names):
    output = predictor.predict(data_batch)

    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        rois = data_dict['rois'].asnumpy()[:, 1:]
    im_shape = data_dict['data'].shape

    # save output
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    # post processing
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    return scores, pred_boxes, data_dict
Beispiel #7
0
def im_detect(predictor, data_batch, data_names, scale):
    output = predictor.predict(data_batch)

    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
    im_shape = data_dict['data'].shape

    # save output
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    # post processing
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    # we used scaled image & roi to train, so it is necessary to transform them back
    pred_boxes = pred_boxes / scale

    return scores, pred_boxes, data_dict
Beispiel #8
0
def im_rpn_detect(predictor, data_batch, data_names, scale):
    output = predictor.predict(data_batch)
    print(output.keys())
    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
    im_shape = data_dict['data'].shape

    # save output
    scores = output['rois_score'].asnumpy()
    #bbox_deltas = output['rpn_bbox_pred_output'].asnumpy()[0]

    # # post processing
    #pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(rois, im_shape[-2:])
    #print(pred_boxes.shape)

    # # we used scaled image & roi to train, so it is necessary to transform them back
    pred_boxes = pred_boxes / scale

    return scores, pred_boxes, data_dict
Beispiel #9
0
def im_detect(predictor, data_batch, data_names, scales):
    output_all = predictor.predict(data_batch)
    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    pred_masks_all = []
    rois_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if config.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            raise NotImplementedError
        im_shape = data_dict['data'].shape
        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        pred_masks = output['mask_pred_output'].asnumpy()

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        rois = rois / scale
        pred_boxes = pred_boxes / scale

        #print scores.shape, rois.shape, pred_boxes.shape, pred_masks.shape

        scores_all.append(scores)
        rois_all.append(rois)
        pred_boxes_all.append(pred_boxes)
        pred_masks_all.append(pred_masks)

    return scores_all, rois_all, pred_boxes_all, pred_masks_all, data_dict_all
Beispiel #10
0
def im_detect_mask(predictor, data_batch, data_names, scale=1):
    output = predictor.predict(data_batch)
    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        raise NotImplementedError

    im_shape = data_dict['data'].shape

    if config.TEST.HAS_RPN:
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        mask_output = output['mask_prob_output'].asnumpy()
    else:
        raise NotImplementedError
    # post processing
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    # we used scaled image & roi to train, so it is necessary to transform them back
    pred_boxes = pred_boxes / scale

    return scores, pred_boxes, data_dict, mask_output
Beispiel #11
0
def im_detect_mask(predictor, data_batch, data_names, scale=1):
    output = predictor.predict(data_batch)
    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        raise NotImplementedError

    im_shape = data_dict['data'].shape

    if config.TEST.HAS_RPN:
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        mask_output = output['mask_prob_output'].asnumpy()
    else:
        raise NotImplementedError
    # post processing
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    # we used scaled image & roi to train, so it is necessary to transform them back
    pred_boxes = pred_boxes / scale

    return scores, pred_boxes, data_dict, mask_output
Beispiel #12
0
def demo_net(predictor, data, image_names, im_scales):
    data = [[mx.nd.array(data[i][name]) for name in DATA_NAMES] for i in xrange(len(data))]
    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(DATA_NAMES, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        _, _, _, _, _= im_detect(predictor, data_batch, DATA_NAMES, scales)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(DATA_NAMES, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, boxes2, masks, data_dict = im_detect(predictor, data_batch, DATA_NAMES, scales)
        im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))]

        # mask output
        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(config.NUM_CLASSES)]
            all_masks = [[] for _ in xrange(config.NUM_CLASSES)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, config.NUM_CLASSES):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, config.NUM_CLASSES)]
            masks = [all_masks[j] for j in range(1, config.NUM_CLASSES)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print (im_height, im_width)
            boxes_ = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(masks, boxes_, scores[0], config.NUM_CLASSES,
                                                        100, im_width, im_height,
                                                        config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                                                        config.BINARY_THRESH, 0)

            dets = [result_dets[j] for j in range(1, config.NUM_CLASSES)]
            masks = [result_masks[j][:, 0, :, :] for j in range(1, config.NUM_CLASSES)]
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:,-1]>0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread('../data/demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, CLASSES)

        # debug
        '''
        for ii in range(scores[0].shape[0]):
            for jj in range(1, scores[0].shape[1]):
                if scores[0][ii][jj]>0.7:
                    print ii, jj, scores[0][ii][jj]
        '''
        # bounding box output
        all_boxes = [[] for _ in CLASSES]
        nms = py_nms_wrapper(NMS_THRESH)
        for cls in CLASSES:
            cls_ind = CLASSES.index(cls)+1
            cls_boxes = boxes2[0][:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[0][:, cls_ind, np.newaxis]
            keep = np.where(cls_scores >= CONF_THRESH)[0]
            #print cls, keep
            dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
            keep = nms(dets)
            all_boxes[cls_ind-1] = dets[keep, :]

        boxes_this_image = [all_boxes[j] for j in range(len(CLASSES))]
        vis_all_detection(data_dict[0]['data'].asnumpy(), boxes_this_image, CLASSES, im_scales[idx])

    print 'done'
Beispiel #13
0
    def forward(self, is_train, req, in_data, out_data, aux):
        """Implements forward computation.

        is_train : bool, whether forwarding for training or testing.
        req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc.
        in_data : list of NDArray, input data.
        out_data : list of NDArray, pre-allocated output buffers.
        aux : list of NDArray, mutable auxiliary states. Usually not used.
        """
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # 对(H,W)大小的特征图上的每一点i:
        #      以 i 为中心生成A个锚定框
        #      利用回归的位置参数,修正这 A 个 anchor 的位置,得到 RoIs
        # 将预测的边界框裁剪成图像
        # 清除掉预测边界框中长或宽 小于阈值的
        # 按分数降序排列(proposal,score)
        # 在采用NMS取前N个预测边界框
        # 使用阈值0.7对这N个框使用非极大值抑制
        # 取使用NMS后前n个预测边界框
        # 返回前Top n 个的边界框,进行分类和回归

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        logger.debug('im_info: %s' % im_info)

        # 1. Generate proposals from bbox_deltas and shifted anchors
        # use real image size instead of padded feature map sizes
        height, width = int(im_info[0] / self._feat_stride), int(
            im_info[1] / self._feat_stride)

        logger.debug('score map size: (%d, %d)' %
                     (scores.shape[2], scores.shape[3]))
        logger.debug('resudial: (%d, %d)' %
                     (scores.shape[2] - height, scores.shape[3] - width))

        # Enumerate all shifts
        # 这块的思路是生成一系列的shift, 然后每一个shift和9个anchor相加,迭代出每一个位置的9个框
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)  #产生一个以向量x为行,向量y为列的矩阵
        #经过meshgrid shift_x = [[  0  16  32 ..., 560 576 592] [  0  16  32 ..., 560 576 592] [  0  16  32 ..., 560 576 592] ..., [  0  16  32 ..., 560 576 592] [  0  16  32 ..., 560 576 592] [  0  16  32 ..., 560 576 592]]
        #shift_y = [[  0   0   0 ...,   0   0   0] [ 16  16  16 ...,  16  16  16] [ 32  32  32 ...,  32  32  32]  ..., [560 560 560 ..., 560 560 560] [576 576 576 ..., 576 576 576] [592 592 592 ..., 592 592 592]]

        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        # 转至之后形成所有位移
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        # _anchors中每一个anchor和每一个shift相加得出结果
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        # K个位移,每个位移A个框
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        # 根据回归的偏移量修正位置
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        # 裁剪掉边框超出图片边界的部分
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        # 清除掉预测边界框中长或宽 小于阈值的
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        # 按分数降序排列,并取前N个(proposal, score)

        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        # 如果不够,就随机选择不足的个数来填充
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # 输出ROIS,送给fast-rcnn训练
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        # 形成五元组(0,x1,y1,x2,y2)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Beispiel #14
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))

        # 1. Generate proposals from bbox_deltas and shifted anchors
        # use real image size instead of padded feature map sizes
        height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride)

        if DEBUG:
            print('score map size: {}'.format(scores.shape))
            print("resudial: {}".format((scores.shape[2] - height, scores.shape[3] - width)))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
    def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False):
        #print('in_detect', threshold, scales, do_flip, do_nms)
        proposals_list = []
        scores_list = []
        landmarks_list = []
        strides_list = []
        timea = datetime.datetime.now()
        flips = [0]
        if do_flip:
            flips = [0, 1]

        imgs = [img]
        if isinstance(img, list):
            imgs = img
        for img in imgs:
            for im_scale in scales:
                for flip in flips:
                    if im_scale != 1.0:
                        im = cv2.resize(img,
                                        None,
                                        None,
                                        fx=im_scale,
                                        fy=im_scale,
                                        interpolation=cv2.INTER_LINEAR)
                    else:
                        im = img.copy()
                    if flip:
                        im = im[:, ::-1, :]
                    if self.nocrop:
                        if im.shape[0] % 32 == 0:
                            h = im.shape[0]
                        else:
                            h = (im.shape[0] // 32 + 1) * 32
                        if im.shape[1] % 32 == 0:
                            w = im.shape[1]
                        else:
                            w = (im.shape[1] // 32 + 1) * 32
                        _im = np.zeros((h, w, 3), dtype=np.float32)
                        _im[0:im.shape[0], 0:im.shape[1], :] = im
                        im = _im
                    else:
                        im = im.astype(np.float32)
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('X1 uses', diff.total_seconds(), 'seconds')
                    #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
                    #im_info = [im.shape[0], im.shape[1], im_scale]
                    im_info = [im.shape[0], im.shape[1]]
                    im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
                    for i in range(3):
                        im_tensor[0, i, :, :] = (
                            im[:, :, 2 - i] / self.pixel_scale -
                            self.pixel_means[2 - i]) / self.pixel_stds[2 - i]
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('X2 uses', diff.total_seconds(), 'seconds')
                    data = nd.array(im_tensor)
                    db = mx.io.DataBatch(data=(data, ),
                                         provide_data=[('data', data.shape)])
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('X3 uses', diff.total_seconds(), 'seconds')
                    self.model.forward(db, is_train=False)
                    net_out = self.model.get_outputs()
                    #post_nms_topN = self._rpn_post_nms_top_n
                    #min_size_dict = self._rpn_min_size_fpn

                    sym_idx = 0

                    for _idx, s in enumerate(self._feat_stride_fpn):
                        #if len(scales)>1 and s==32 and im_scale==scales[-1]:
                        #  continue
                        _key = 'stride%s' % s
                        stride = int(s)
                        is_cascade = False
                        if self.cascade:
                            is_cascade = True
                        #if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]):
                        #  continue
                        #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
                        scores = net_out[sym_idx].asnumpy()
                        if self.debug:
                            timeb = datetime.datetime.now()
                            diff = timeb - timea
                            print('A uses', diff.total_seconds(), 'seconds')
                        #print(scores.shape)
                        #print('scores',stride, scores.shape, file=sys.stderr)
                        scores = scores[:, self._num_anchors['stride%s' %
                                                             s]:, :, :]

                        bbox_deltas = net_out[sym_idx + 1].asnumpy()

                        #if DEBUG:
                        #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                        #    print 'scale: {}'.format(im_info[2])

                        #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
                        height, width = bbox_deltas.shape[
                            2], bbox_deltas.shape[3]

                        A = self._num_anchors['stride%s' % s]
                        K = height * width
                        anchors_fpn = self._anchors_fpn['stride%s' % s]
                        anchors = anchors_plane(height, width, stride,
                                                anchors_fpn)
                        #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                        anchors = anchors.reshape((K * A, 4))
                        #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr)
                        #print('HW', (height, width), file=sys.stderr)
                        #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr)
                        #print('anchors', anchors.shape, file=sys.stderr)
                        #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr)
                        #print('scores', scores.shape, file=sys.stderr)

                        #scores = self._clip_pad(scores, (height, width))
                        scores = scores.transpose((0, 2, 3, 1)).reshape(
                            (-1, 1))

                        #print('pre', bbox_deltas.shape, height, width)
                        #bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                        #print('after', bbox_deltas.shape, height, width)
                        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
                        bbox_pred_len = bbox_deltas.shape[3] // A
                        #print(bbox_deltas.shape)
                        bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
                        bbox_deltas[:,
                                    0::4] = bbox_deltas[:, 0::
                                                        4] * self.bbox_stds[0]
                        bbox_deltas[:,
                                    1::4] = bbox_deltas[:, 1::
                                                        4] * self.bbox_stds[1]
                        bbox_deltas[:,
                                    2::4] = bbox_deltas[:, 2::
                                                        4] * self.bbox_stds[2]
                        bbox_deltas[:,
                                    3::4] = bbox_deltas[:, 3::
                                                        4] * self.bbox_stds[3]
                        proposals = self.bbox_pred(anchors, bbox_deltas)

                        #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                        if is_cascade:
                            cascade_sym_num = 0
                            cls_cascade = False
                            bbox_cascade = False
                            __idx = [3, 4]
                            if not self.use_landmarks:
                                __idx = [2, 3]
                            for diff_idx in __idx:
                                if sym_idx + diff_idx >= len(net_out):
                                    break
                                body = net_out[sym_idx + diff_idx].asnumpy()
                                if body.shape[1] // A == 2:  #cls branch
                                    if cls_cascade or bbox_cascade:
                                        break
                                    else:
                                        cascade_scores = body[:, self.
                                                              _num_anchors[
                                                                  'stride%s' %
                                                                  s]:, :, :]
                                        cascade_scores = cascade_scores.transpose(
                                            (0, 2, 3, 1)).reshape((-1, 1))
                                        #scores = (scores+cascade_scores)/2.0
                                        scores = cascade_scores  #TODO?
                                        cascade_sym_num += 1
                                        cls_cascade = True
                                        #print('find cascade cls at stride', stride)
                                elif body.shape[1] // A == 4:  #bbox branch
                                    cascade_deltas = body.transpose(
                                        (0, 2, 3, 1)).reshape(
                                            (-1, bbox_pred_len))
                                    cascade_deltas[:, 0::
                                                   4] = cascade_deltas[:, 0::
                                                                       4] * self.bbox_stds[
                                                                           0]
                                    cascade_deltas[:, 1::
                                                   4] = cascade_deltas[:, 1::
                                                                       4] * self.bbox_stds[
                                                                           1]
                                    cascade_deltas[:, 2::
                                                   4] = cascade_deltas[:, 2::
                                                                       4] * self.bbox_stds[
                                                                           2]
                                    cascade_deltas[:, 3::
                                                   4] = cascade_deltas[:, 3::
                                                                       4] * self.bbox_stds[
                                                                           3]
                                    proposals = self.bbox_pred(
                                        proposals, cascade_deltas)
                                    cascade_sym_num += 1
                                    bbox_cascade = True
                                    #print('find cascade bbox at stride', stride)

                        proposals = clip_boxes(proposals, im_info[:2])

                        #if self.vote:
                        #  if im_scale>1.0:
                        #    keep = self._filter_boxes2(proposals, 160*im_scale, -1)
                        #  else:
                        #    keep = self._filter_boxes2(proposals, -1, 100*im_scale)
                        #  if stride==4:
                        #    keep = self._filter_boxes2(proposals, 12*im_scale, -1)
                        #    proposals = proposals[keep, :]
                        #    scores = scores[keep]

                        #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
                        #proposals = proposals[keep, :]
                        #scores = scores[keep]
                        #print('333', proposals.shape)
                        if stride == 4 and self.decay4 < 1.0:
                            scores *= self.decay4

                        scores_ravel = scores.ravel()
                        #print('__shapes', proposals.shape, scores_ravel.shape)
                        #print('max score', np.max(scores_ravel))
                        order = np.where(scores_ravel >= threshold)[0]
                        #_scores = scores_ravel[order]
                        #_order = _scores.argsort()[::-1]
                        #order = order[_order]
                        proposals = proposals[order, :]
                        scores = scores[order]
                        if flip:
                            oldx1 = proposals[:, 0].copy()
                            oldx2 = proposals[:, 2].copy()
                            proposals[:, 0] = im.shape[1] - oldx2 - 1
                            proposals[:, 2] = im.shape[1] - oldx1 - 1

                        proposals[:, 0:4] /= im_scale

                        proposals_list.append(proposals)
                        scores_list.append(scores)
                        if self.nms_threshold < 0.0:
                            _strides = np.empty(shape=(scores.shape),
                                                dtype=np.float32)
                            _strides.fill(stride)
                            strides_list.append(_strides)

                        if not self.vote and self.use_landmarks:
                            landmark_deltas = net_out[sym_idx + 2].asnumpy()
                            #landmark_deltas = self._clip_pad(landmark_deltas, (height, width))
                            landmark_pred_len = landmark_deltas.shape[1] // A
                            landmark_deltas = landmark_deltas.transpose(
                                (0, 2, 3, 1)).reshape(
                                    (-1, 5, landmark_pred_len // 5))
                            landmark_deltas *= self.landmark_std
                            #print(landmark_deltas.shape, landmark_deltas)
                            landmarks = self.landmark_pred(
                                anchors, landmark_deltas)
                            landmarks = landmarks[order, :]

                            if flip:
                                landmarks[:, :,
                                          0] = im.shape[1] - landmarks[:, :,
                                                                       0] - 1
                                #for a in range(5):
                                #  oldx1 = landmarks[:, a].copy()
                                #  landmarks[:,a] = im.shape[1] - oldx1 - 1
                                order = [1, 0, 2, 4, 3]
                                flandmarks = landmarks.copy()
                                for idx, a in enumerate(order):
                                    flandmarks[:, idx, :] = landmarks[:, a, :]
                                    #flandmarks[:, idx*2] = landmarks[:,a*2]
                                    #flandmarks[:, idx*2+1] = landmarks[:,a*2+1]
                                landmarks = flandmarks
                            landmarks[:, :, 0:2] /= im_scale
                            #landmarks /= im_scale
                            #landmarks = landmarks.reshape( (-1, landmark_pred_len) )
                            landmarks_list.append(landmarks)
                            #proposals = np.hstack((proposals, landmarks))
                        if self.use_landmarks:
                            sym_idx += 3
                        else:
                            sym_idx += 2
                        if is_cascade:
                            sym_idx += cascade_sym_num

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('B uses', diff.total_seconds(), 'seconds')
        proposals = np.vstack(proposals_list)
        landmarks = None
        if proposals.shape[0] == 0:
            if self.use_landmarks:
                landmarks = np.zeros((0, 5, 2))
            if self.nms_threshold < 0.0:
                return np.zeros((0, 6)), landmarks
            else:
                return np.zeros((0, 5)), landmarks
        scores = np.vstack(scores_list)
        #print('shapes', proposals.shape, scores.shape)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        #if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        proposals = proposals[order, :]
        scores = scores[order]
        if self.nms_threshold < 0.0:
            strides = np.vstack(strides_list)
            strides = strides[order]
        if not self.vote and self.use_landmarks:
            landmarks = np.vstack(landmarks_list)
            landmarks = landmarks[order].astype(np.float32, copy=False)

        if self.nms_threshold > 0.0:
            pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                    copy=False)
            if not self.vote:
                keep = self.nms(pre_det)
                det = np.hstack((pre_det, proposals[:, 4:]))
                det = det[keep, :]
                if self.use_landmarks:
                    landmarks = landmarks[keep]
            else:
                det = np.hstack((pre_det, proposals[:, 4:]))
                det = self.bbox_vote(det)
        elif self.nms_threshold < 0.0:
            det = np.hstack(
                (proposals[:, 0:4], scores, strides)).astype(np.float32,
                                                             copy=False)
        else:
            det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('C uses', diff.total_seconds(), 'seconds')
        return det, landmarks
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0][0].context.device_id)

        cls_prob_dict = dict(zip(self.fpn_keys, in_data[0:len(self.fpn_keys)]))
        bbox_pred_dict = dict(
            zip(self.fpn_keys,
                in_data[len(self.fpn_keys):2 * len(self.fpn_keys)]))
        #for i in xrange(6):
        #  print(i, in_data[i].asnumpy().shape)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size_dict = self._rpn_min_size_fpn

        proposals_list = []
        scores_list = []
        for s in self._feat_stride_fpn:
            _key = 'stride%s' % s
            stride = int(s)
            scores = cls_prob_dict[_key].asnumpy()
            #print('scores',stride, scores.shape, file=sys.stderr)
            scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]
            bbox_deltas = bbox_pred_dict['stride%s' % s].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]

            #if DEBUG:
            #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            #    print 'scale: {}'.format(im_info[2])

            _height, _width = int(im_info[0] / stride), int(im_info[1] /
                                                            stride)
            height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

            A = self._num_anchors['stride%s' % s]
            K = height * width

            anchors = anchors_plane(
                height, width, stride,
                self._anchors_fpn['stride%s' % s].astype(np.float32))
            print((height, width), (_height, _width),
                  anchors.shape,
                  bbox_deltas.shape,
                  scores.shape,
                  file=sys.stderr)
            anchors = anchors.reshape((K * A, 4))

            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
            proposals = self._bbox_pred(anchors, bbox_deltas)
            #proposals = anchors

            proposals = clip_boxes(proposals, im_info[:2])

            #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
            #proposals = proposals[keep, :]
            #scores = scores[keep]
            #print('333', proposals.shape)

            scores_ravel = scores.ravel()
            order = scores_ravel.argsort()[::-1]
            if pre_nms_topN > 0:
                order = order[:pre_nms_topN]
            proposals = proposals[order, :]
            scores = scores[order]

            proposals_list.append(proposals)
            scores_list.append(scores)

        proposals = np.vstack(proposals_list)
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        #if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        #if pre_nms_topN > 0:
        #    order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores)).astype(np.float32)

        #if np.shape(det)[0] == 0:
        #    print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.")
        #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
        #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
        #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)

        if self._threshold < 1.0:
            keep = nms(det)
        else:
            keep = range(det.shape[0])
        #print(det.shape, len(keep), post_nms_topN)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        #print(det.shape, len(keep), post_nms_topN)
        num_keep = len(keep)
        #print('keep', keep, file=sys.stderr)

        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))

        proposals = proposals[keep, :]
        scores = scores[keep]
        scores[num_keep:, :] = -1.0
        #print('333 proposals', proposals[0:5,:], file=sys.stderr)
        #print('det', det.shape, num_keep)
        #print('first proposal', proposals[0], file=sys.stderr)

        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
    def detect(self, img, threshold=0.5, im_scale=1.0):
        proposals_list = []
        scores_list = []
        landmarks_list = []
        data = nd.array(img)
        db = mx.io.DataBatch(data=(data, ),
                             provide_data=[('data', data.shape)])
        self.model.forward(db, is_train=False)
        net_out = self.model.get_outputs()
        for _idx, s in enumerate(self._feat_stride_fpn):
            _key = 'stride%s' % s
            stride = int(s)
            if self.use_landmarks:
                idx = _idx * 3
            else:
                idx = _idx * 2
            scores = net_out[idx].asnumpy()
            scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]

            idx += 1
            bbox_deltas = net_out[idx].asnumpy()

            height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

            A = self._num_anchors['stride%s' % s]
            K = height * width
            anchors_fpn = self._anchors_fpn['stride%s' % s]
            anchors = anchors_plane(height, width, stride, anchors_fpn)
            anchors = anchors.reshape((K * A, 4))
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
            bbox_pred_len = bbox_deltas.shape[3] // A
            bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))

            proposals = self.bbox_pred(anchors, bbox_deltas)
            proposals = clip_boxes(proposals, (img.shape[2], img.shape[3]))

            scores_ravel = scores.ravel()
            order = np.where(scores_ravel >= threshold)[0]
            proposals = proposals[order, :]
            scores = scores[order]
            if stride == 4 and self.decay4 < 1.0:
                scores *= self.decay4

            proposals[:, 0:4] /= im_scale

            proposals_list.append(proposals)
            scores_list.append(scores)

            if not self.vote and self.use_landmarks:
                idx += 1
                landmark_deltas = net_out[idx].asnumpy()
                landmark_deltas = self._clip_pad(landmark_deltas,
                                                 (height, width))
                landmark_pred_len = landmark_deltas.shape[1] // A
                landmark_deltas = landmark_deltas.transpose(
                    (0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len // 5))
                landmarks = self.landmark_pred(anchors, landmark_deltas)
                landmarks = landmarks[order, :]
                landmarks[:, :, 0:2] /= im_scale
                landmarks_list.append(landmarks)

        proposals = np.vstack(proposals_list)
        landmarks = None
        if proposals.shape[0] == 0:
            if self.use_landmarks:
                landmarks = np.zeros((0, 5, 2))
            return np.zeros((0, 5)), landmarks
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        proposals = proposals[order, :]
        scores = scores[order]
        if not self.vote and self.use_landmarks:
            landmarks = np.vstack(landmarks_list)
            landmarks = landmarks[order].astype(np.float32, copy=False)

        pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)
        if not self.vote:
            keep = self.nms(pre_det)
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = det[keep, :]
            if self.use_landmarks:
                landmarks = landmarks[keep]
        else:
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = self.bbox_vote(det)
        return det, landmarks
Beispiel #18
0
    def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False):

        proposals_list = []
        scores_list = []
        landmarks_list = []
        timea = datetime.datetime.now()
        flips = [0]
        if do_flip:
            flips = [0, 1]

        for im_scale in scales:
            for flip in flips:
                if im_scale != 1.0:
                    im = cv2.resize(img,
                                    None,
                                    None,
                                    fx=im_scale,
                                    fy=im_scale,
                                    interpolation=cv2.INTER_LINEAR)
                else:
                    im = img.copy()
                if flip:
                    im = im[:, ::-1, :]
                if self.nocrop:
                    if im.shape[0] % 32 == 0:
                        h = im.shape[0]
                    else:
                        h = (im.shape[0] // 32 + 1) * 32
                    if im.shape[1] % 32 == 0:
                        w = im.shape[1]
                    else:
                        w = (im.shape[1] // 32 + 1) * 32
                    _im = np.zeros((h, w, 3), dtype=np.float32)
                    _im[0:im.shape[0], 0:im.shape[1], :] = im
                    im = _im
                else:
                    im = im.astype(np.float32)
                if self.debug:
                    timeb = datetime.datetime.now()
                    diff = timeb - timea
                    print('X1 uses', diff.total_seconds(), 'seconds')

                im_info = [im.shape[0], im.shape[1]]
                im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
                for i in range(3):
                    im_tensor[0, i, :, :] = (
                        im[:, :, 2 - i] / self.pixel_scale -
                        self.pixel_means[2 - i]) / self.pixel_stds[2 - i]
                if self.debug:
                    timeb = datetime.datetime.now()
                    diff = timeb - timea
                    print('X2 uses', diff.total_seconds(), 'seconds')
                data = nd.array(im_tensor)
                db = mx.io.DataBatch(data=(data, ),
                                     provide_data=[('data', data.shape)])
                if self.debug:
                    timeb = datetime.datetime.now()
                    diff = timeb - timea
                    print('X3 uses', diff.total_seconds(), 'seconds')
                self.model.forward(db, is_train=False)
                net_out = self.model.get_outputs()

                for _idx, s in enumerate(self._feat_stride_fpn):

                    _key = 'stride%s' % s
                    stride = int(s)

                    if self.use_landmarks:
                        idx = _idx * 3
                    else:
                        idx = _idx * 2

                    scores = net_out[idx].asnumpy()
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('A uses', diff.total_seconds(), 'seconds')

                    scores = scores[:,
                                    self._num_anchors['stride%s' % s]:, :, :]

                    idx += 1
                    bbox_deltas = net_out[idx].asnumpy()

                    height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                    A = self._num_anchors['stride%s' % s]
                    K = height * width
                    anchors_fpn = self._anchors_fpn['stride%s' % s]
                    anchors = anchors_plane(height, width, stride, anchors_fpn)

                    anchors = anchors.reshape((K * A, 4))

                    scores = self._clip_pad(scores, (height, width))
                    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                    bbox_deltas = self._clip_pad(bbox_deltas, (height, width))

                    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
                    bbox_pred_len = bbox_deltas.shape[3] // A

                    bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))

                    proposals = self.bbox_pred(anchors, bbox_deltas)
                    proposals = clip_boxes(proposals, im_info[:2])

                    scores_ravel = scores.ravel()

                    order = np.where(scores_ravel >= threshold)[0]

                    proposals = proposals[order, :]
                    scores = scores[order]
                    if stride == 4 and self.decay4 < 1.0:
                        scores *= self.decay4
                    if flip:
                        oldx1 = proposals[:, 0].copy()
                        oldx2 = proposals[:, 2].copy()
                        proposals[:, 0] = im.shape[1] - oldx2 - 1
                        proposals[:, 2] = im.shape[1] - oldx1 - 1

                    proposals[:, 0:4] /= im_scale

                    proposals_list.append(proposals)
                    scores_list.append(scores)

                    if not self.vote and self.use_landmarks:
                        idx += 1
                        landmark_deltas = net_out[idx].asnumpy()
                        landmark_deltas = self._clip_pad(
                            landmark_deltas, (height, width))
                        landmark_pred_len = landmark_deltas.shape[1] // A
                        landmark_deltas = landmark_deltas.transpose(
                            (0, 2, 3, 1)).reshape(
                                (-1, 5, landmark_pred_len // 5))

                        landmarks = self.landmark_pred(anchors,
                                                       landmark_deltas)
                        landmarks = landmarks[order, :]

                        if flip:
                            landmarks[:, :,
                                      0] = im.shape[1] - landmarks[:, :, 0] - 1

                            order = [1, 0, 2, 4, 3]
                            flandmarks = landmarks.copy()
                            for idx, a in enumerate(order):
                                flandmarks[:, idx, :] = landmarks[:, a, :]

                            landmarks = flandmarks
                        landmarks[:, :, 0:2] /= im_scale
                        landmarks_list.append(landmarks)

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('B uses', diff.total_seconds(), 'seconds')
        proposals = np.vstack(proposals_list)
        landmarks = None
        if proposals.shape[0] == 0:
            if self.use_landmarks:
                landmarks = np.zeros((0, 5, 2))
            return np.zeros((0, 5)), landmarks
        scores = np.vstack(scores_list)
        #print('shapes', proposals.shape, scores.shape)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        #if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        proposals = proposals[order, :]
        scores = scores[order]
        if not self.vote and self.use_landmarks:
            landmarks = np.vstack(landmarks_list)
            landmarks = landmarks[order].astype(np.float32, copy=False)

        pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)
        if not self.vote:
            keep = self.nms(pre_det)
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = det[keep, :]
            if self.use_landmarks:
                landmarks = landmarks[keep]
        else:
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = self.bbox_vote(det)
        #if self.use_landmarks:
        #  det = np.hstack((det, landmarks))

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('C uses', diff.total_seconds(), 'seconds')
        return det, landmarks
    def detect(self, img, threshold=0.5):
        proposals_list = []
        scores_list = []
        landmarks_list = []
        im_info = [640, 640]
        if img.shape[0] != img.shape[1]:
            BLUE = (255, 0, 0)
            if img.shape[0] > img.shape[1]:
                img = cv2.copyMakeBorder(img,
                                         0,
                                         0,
                                         0,
                                         img.shape[0] - img.shape[1],
                                         cv2.BORDER_CONSTANT,
                                         value=BLUE)
            else:
                img = cv2.copyMakeBorder(img,
                                         0,
                                         img.shape[1] - img.shape[0],
                                         0,
                                         0,
                                         cv2.BORDER_CONSTANT,
                                         value=BLUE)
        re_scale = float(im_info[0]) / float(img.shape[0])
        img = cv2.resize(img, (im_info[0], im_info[1]))
        img = img.astype(np.float32)

        im_tensor = np.zeros((1, 3, img.shape[0], img.shape[1]))
        for i in range(3):
            im_tensor[
                0,
                i, :, :] = (img[:, :, 2 - i] / self.pixel_scale -
                            self.pixel_means[2 - i]) / self.pixel_stds[2 - i]

        data = nd.array(im_tensor)
        db = mx.io.DataBatch(data=(data, ),
                             provide_data=[('data', data.shape)])

        self.model.forward(db, is_train=False)
        net_out = self.model.get_outputs()

        for _idx, s in enumerate(self._feat_stride_fpn):
            stride = int(s)
            idx = _idx * 3
            scores = net_out[idx].asnumpy()
            scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]

            idx += 1
            bbox_deltas = net_out[idx].asnumpy()

            idx += 1
            landmark_deltas = net_out[idx].asnumpy()

            height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

            A = self._num_anchors['stride%s' % s]
            K = height * width
            anchors_fpn = self._anchors_fpn['stride%s' % s]
            anchors = anchors_plane(height, width, stride, anchors_fpn)
            anchors = anchors.reshape((K * A, 4))

            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
            bbox_pred_len = bbox_deltas.shape[3] // A
            bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
            proposals = self.bbox_pred(anchors, bbox_deltas)
            proposals = clip_boxes(proposals, im_info[:2])

            landmark_deltas = self._clip_pad(landmark_deltas, (height, width))
            landmark_pred_len = landmark_deltas.shape[1] // A
            landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape(
                (-1, 5, landmark_pred_len // 5))
            landmarks = self.landmark_pred(anchors, landmark_deltas)

            scores_ravel = scores.ravel()
            order = np.where(scores_ravel >= threshold)[0]

            scores = scores[order]
            proposals = proposals[order, :]
            landmarks = landmarks[order, :]

            if stride == 4 and self.decay4 < 1.0:
                scores *= self.decay4

            proposals[:, 0:4] /= re_scale
            landmarks[:, :, 0:2] /= re_scale

            scores_list.append(scores)
            proposals_list.append(proposals)
            landmarks_list.append(landmarks)

        scores = np.vstack(scores_list)
        proposals = np.vstack(proposals_list)
        landmarks = np.vstack(landmarks_list)
        if proposals.shape[0] == 0:
            return np.zeros((0, 5)), np.zeros((0, 5, 2))

        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]

        scores = scores[order]
        proposals = proposals[order, :]
        landmarks = landmarks[order].astype(np.float32, copy=False)

        pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)
        keep = self.nms(pre_det)

        det = np.hstack((pre_det, proposals[:, 4:]))
        det = det[keep, :]
        landmarks = landmarks[keep]

        return det, landmarks
Beispiel #20
0
	def detect(self, img, thresh, scales=[1.0], do_flip=False):
		proposal_list = []
		scores_list = []
		landmarks_list = []
		flips = [0,1] if do_flip else [0]
		for im_scale in scales:
			for flip in flips:
				if im_scale!=1.0:
					img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
				else:
					img = img.copy()
				if flip:
					img = img[:,::-1,:]

				# img = self.pad_img_to_32(img)
				imgshape = [img.shape[0], img.shape[1]]
				img = self.preprocess(img)
				img = torch.from_numpy(img)

				if self.use_gpu:
					img = img.cuda()

				net_out = self.model(img)

				for _idx,s in enumerate(self.fpn_keys):
					idx = _idx * 3
					scores = net_out[idx].detach().cpu().numpy()
					scores = scores[:, self._num_anchors[s]:]
					

					idx += 1
					bbox_deltas = net_out[idx].detach().cpu().numpy()

					h, w = bbox_deltas.shape[2], bbox_deltas.shape[3]

					A = self._num_anchors[s]
					K = h*w
					anchors_fpn = self._anchors_fpn[s]
					anchors_fpn = np.float32(anchors_fpn)
					anchors = anchors_plane(h, w, s, anchors_fpn)
					anchors = anchors.reshape((K*A, 4))

					scores = self._clip_pad(scores, (h, w))
					scores = scores.transpose([0,2,3,1]).reshape([-1,1])
					# print('SCR')
					# print(scores)
					# print(scores.shape)
					# input()

					bbox_deltas = self._clip_pad(bbox_deltas, (h,w))
					bbox_deltas = bbox_deltas.transpose([0,2,3,1])
					bbox_pred_len = bbox_deltas.shape[3]//A
					bbox_deltas = bbox_deltas.reshape([-1, bbox_pred_len])

					proposals = self.bbox_pred(anchors, bbox_deltas)
					proposals = clip_boxes(proposals, imgshape)
					

					scores_ravel = scores.ravel()
					order = np.where(scores_ravel>=thresh)[0]

					proposals = proposals[order]
					scores = scores[order]

					if flip:
						oldx1 = proposals[:, 0].copy()
						oldx2 = proposals[:, 2].copy()
						proposals[:, 0] = im.shape[1] - oldx2 - 1
						proposals[:, 2] = im.shape[1] - oldx1 - 1

					proposals[:,:4] /= im_scale
					# print('proposals')
					# print(proposals)
					# print(proposals.shape)
					# input()

					proposal_list.append(proposals)
					scores_list.append(scores)

					# landmarks 
					idx += 1 
					landmark_deltas = net_out[idx].detach().cpu().numpy()
					landmark_deltas = self._clip_pad(landmark_deltas, (h,w))
					landmark_pred_len = landmark_deltas.shape[1]//A 
					landmark_deltas = landmark_deltas.transpose((0,2,3,1)).reshape([-1,5,landmark_pred_len//5])
					landmarks = self.landmark_pred(anchors, landmark_deltas)
					landmarks = landmarks[order, :]

					if flip:
						landmarks[:,:,0] = imgshape[1] - landmarks[:,:,0] - 1 
						order = [1,0,2,4,3]
						flandmarks = landmarks[:,np.int32(order)]

					landmarks[:,:,:2] /= im_scale
					landmarks_list.append(landmarks)

		# print('PROPOSAL', proposal_list)
		proposals = np.vstack(proposal_list)
		landmarks = None 
		if proposals.shape[0]==0:
			return np.zeros([0,5]), np.zeros([0,5,2])
		scores = np.vstack(scores_list)
		scores_ravel = scores.ravel()
		order = scores_ravel.argsort()[::-1]
		proposals = proposals[order]
		scores = scores[order]
		landmarks = np.vstack(landmarks_list)
		landmarks = np.float32(landmarks[order])

		pre_det = np.hstack([proposals[:, 0:4], scores])
		pre_det = np.float32(pre_det)

		keep = self.nms(pre_det)
		det = np.hstack([pre_det, proposals[:,4:]])
		det = det[keep]
		landmarks = landmarks[keep]

		return det, landmarks
Beispiel #21
0
  def detect(self, img, threshold=0.05, scales=[1.0]):
    proposals_list = []
    scores_list = []

    for im_scale in scales:

      if im_scale!=1.0:
        im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
      else:
        im = img
      im = im.astype(np.float32)
      #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
      im_info = [im.shape[0], im.shape[1], im_scale]
      im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
      for i in range(3):
          im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i]
      data = nd.array(im_tensor)
      db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
      self.model.forward(db, is_train=False)
      net_out = self.model.get_outputs()
      pre_nms_topN = self._rpn_pre_nms_top_n
      #post_nms_topN = self._rpn_post_nms_top_n
      #min_size_dict = self._rpn_min_size_fpn

      for s in self._feat_stride_fpn:
          if len(scales)>1 and s==32 and im_scale==scales[-1]:
            continue
          _key = 'stride%s'%s
          stride = int(s)
          idx = 0
          if s==16:
            idx=2
          elif s==8:
            idx=4
          print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
          scores = net_out[idx].asnumpy()
          #print(scores.shape)
          idx+=1
          #print('scores',stride, scores.shape, file=sys.stderr)
          scores = scores[:, self._num_anchors['stride%s'%s]:, :, :]
          bbox_deltas = net_out[idx].asnumpy()

          #if DEBUG:
          #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
          #    print 'scale: {}'.format(im_info[2])

          _height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
          height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

          A = self._num_anchors['stride%s'%s]
          K = height * width

          anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32))
          #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
          anchors = anchors.reshape((K * A, 4))

          #print('pre', bbox_deltas.shape, height, width)
          bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
          #print('after', bbox_deltas.shape, height, width)
          bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

          scores = self._clip_pad(scores, (height, width))
          scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

          #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
          proposals = self._bbox_pred(anchors, bbox_deltas)
          #proposals = anchors

          proposals = clip_boxes(proposals, im_info[:2])

          #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
          #proposals = proposals[keep, :]
          #scores = scores[keep]
          #print('333', proposals.shape)

          scores_ravel = scores.ravel()
          order = scores_ravel.argsort()[::-1]
          if pre_nms_topN > 0:
              order = order[:pre_nms_topN]
          proposals = proposals[order, :]
          scores = scores[order]

          proposals /= im_scale

          proposals_list.append(proposals)
          scores_list.append(scores)

    proposals = np.vstack(proposals_list)
    scores = np.vstack(scores_list)
    scores_ravel = scores.ravel()
    order = scores_ravel.argsort()[::-1]
    #if config.TEST.SCORE_THRESH>0.0:
    #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
    #  order = order[:_count]
    #if pre_nms_topN > 0:
    #    order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    det = np.hstack((proposals, scores)).astype(np.float32)

    #if np.shape(det)[0] == 0:
    #    print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.")
    #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
    #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
    #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)

    
    if self.nms_threshold<1.0:
      keep = self.nms(det)
      det = det[keep, :]
    if threshold>0.0:
      keep = np.where(det[:, 4] >= threshold)[0]
      det = det[keep, :]
    return det
Beispiel #22
0
    def detect(self, img, threshold=0.5, scales=[1.0]):
        proposals_list = []
        proposals_kp_list = []
        scores_list = []

        for im_scale in scales:
            if im_scale != 1.0:
                im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
            else:
                im = img
            im = im.astype(np.float32)
            # im_shape = im.shape
            # self.model.bind(data_shapes=[('data', (1, 3, im_shape[0], im_shape[1]))], for_training=False)
            im_info = [im.shape[0], im.shape[1], im_scale]
            im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
            for i in range(3):
                im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] #bgr2rgb  mxnet rgb  opencv bgr
            data = nd.array(im_tensor)
            db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
            
            timea = datetime.datetime.now()
            self.model.forward(db, is_train=False)
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('forward uses', diff.total_seconds(), 'seconds')

            net_out = self.model.get_outputs()      #网络的输出为len=9的list,针对三个不同的stride,分为三大块的list,其中每个list分别代表score,bbox,kpoint三个维度的结果,
            pre_nms_topN = self._rpn_pre_nms_top_n
            #post_nms_topN = self._rpn_post_nms_top_n
            #min_size_dict = self._rpn_min_size_fpn

            for s in self.feat_strides:
                _key = 'stride%s' % s
                # print(_key)
                stride = int(s)
                if s == self.feat_strides[0]:
                    idx = 0
                if s == self.feat_strides[1]:
                    idx = 3
                elif s == self.feat_strides[2]:
                    idx = 6
                # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
                scores = net_out[idx].asnumpy()     #获取每个stride下的分类得分

                idx += 1
                # print('scores',stride, scores.shape, file=sys.stderr)
                scores = scores[:, self._num_anchors['stride%s'%s]:, :, :]    #去掉了其中lable的值???
                bbox_deltas = net_out[idx].asnumpy()
                idx += 1
                _height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
                height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                # kpoint
                kpoint_deltas = net_out[idx].asnumpy()

                A = self._num_anchors['stride%s' % s]
                K = height * width
                anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32))       #RP映射回原图中的坐标位置
                # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                anchors = anchors.reshape((K * A, 4))

                # print('predict bbox_deltas', bbox_deltas.shape, height, width)
                bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                # print('after clip pad', bbox_deltas.shape, height, width)
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

                kpoint_deltas = self._clip_pad(kpoint_deltas, (height, width))
                kpoint_deltas = kpoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, 10))

                scores = self._clip_pad(scores, (height, width))
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                proposals = self._bbox_pred(anchors, bbox_deltas)
                proposals = clip_boxes(proposals, im_info[:2])  #将超出图像的坐标去除掉

                proposals_kp = kpoint_pred(anchors, kpoint_deltas)
                proposals_kp = clip_points(proposals_kp, im_info[:2])
                #取出score的top N
                scores_ravel = scores.ravel()
                order = scores_ravel.argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                proposals_kp = proposals_kp[order, :]
                scores = scores[order]

                proposals /= im_scale
                proposals_kp /= im_scale

                proposals_list.append(proposals)
                proposals_kp_list.append(proposals_kp)
                scores_list.append(scores)

        proposals = np.vstack(proposals_list)
        proposals_kp = np.vstack(proposals_kp_list)
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        #if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        #if pre_nms_topN > 0:
        #    order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        proposals_kp = proposals_kp[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores, proposals_kp)).astype(np.float32)

        #if np.shape(det)[0] == 0:
        #    print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.")
        #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
        #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
        #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)


        if self.nms_threshold < 1.0:
            keep = self.nms(det)
            det = det[keep, :]
        if threshold > 0.0:
            keep = np.where(det[:, 4] >= threshold)[0]
            det = det[keep, :]
        return det
    def detect(self, img, scales_index=0):
        proposals_list = []
        scores_list = []

        im_src = img.copy()

        CONSTANT = config.TEST.CONSTANT
        BLACK = [0, 0, 0]
        img = cv2.copyMakeBorder(img, CONSTANT, CONSTANT, CONSTANT, CONSTANT, cv2.BORDER_CONSTANT, value=BLACK)

        scales = self.get_boxes(img, scales_index)

        for im_scale in scales:
            if im_scale != 1.0:
                im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
            else:
                im = img
            im = im.astype(np.float32)
            # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
            im_info = [im.shape[0], im.shape[1], im_scale]
            im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
            for i in range(3):
                im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i]
            data = nd.array(im_tensor)
            db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
            self.model.forward(db, is_train=False)
            net_out = self.model.get_outputs()
            pre_nms_topN = self._rpn_pre_nms_top_n

            for s in self._feat_stride_fpn:
                if len(scales) > 1 and s == 32 and im_scale == scales[-1]:
                    continue
                _key = 'stride%s' % s
                stride = int(s)
                idx = 0
                if s == 16:
                    idx = 2
                elif s == 8:
                    idx = 4
                # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
                scores = net_out[idx].asnumpy()
                # print(scores.shape)
                idx += 1
                # print('scores',stride, scores.shape, file=sys.stderr)
                scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]
                bbox_deltas = net_out[idx].asnumpy()

                _height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
                height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                A = self._num_anchors['stride%s' % s]
                K = height * width

                anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32))
                # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                anchors = anchors.reshape((K * A, 4))

                # print('pre', bbox_deltas.shape, height, width)
                bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                # print('after', bbox_deltas.shape, height, width)
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

                scores = self._clip_pad(scores, (height, width))
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                proposals = self._bbox_pred(anchors, bbox_deltas)
                # proposals = anchors

                proposals = clip_boxes(proposals, im_info[:2])

                scores_ravel = scores.ravel()
                order = scores_ravel.argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]

                proposals /= im_scale

                # #add by sai with pyramidbox to filt scale face
                # if im_scale > 1:
                #     index = np.where(
                #         np.minimum(proposals[:, 2] - proposals[:, 0] + 1,
                #                    proposals[:, 3] - proposals[:, 1] + 1) < 50)[0]
                #     proposals = proposals[index, :]
                #     scores = scores[index, :]
                # else:
                #     index = np.where(
                #         np.maximum(proposals[:, 2] - proposals[:, 0] + 1,
                #                    proposals[:, 3] - proposals[:, 1] + 1) > 20)[0]
                #     proposals = proposals[index, :]
                #     scores = scores[index, :]

                proposals_list.append(proposals)
                scores_list.append(scores)
        proposals = np.vstack(proposals_list)
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]

        proposals = proposals[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores)).astype(np.float32)

        if self.nms_threshold < 1.0:
            keep = self.nms(det)
            det = det[keep, :]
        threshold = config.TEST.SCORE_THRESH
        if threshold > 0.0:
            keep = np.where(det[:, 4] >= threshold)[0]
            det = det[keep, :]

            # add by sai
        if det.shape[0] != 0:
            for i in range(det.shape[0]):
                det[i, :][0] = det[i, :][0] - CONSTANT
                det[i, :][1] = det[i, :][1] - CONSTANT
                det[i, :][2] = det[i, :][2] - CONSTANT
                det[i, :][3] = det[i, :][3] - CONSTANT
                if det[i, :][0] < 0:
                    det[i, :][0] = 0
                if det[i, :][2] > im_src.shape[1]:
                    det[i, :][2] = im_src.shape[1]
                if det[i, :][1] < 0:
                    det[i, :][1] = 0
                if det[i, :][3] > im_src.shape[0]:
                    det[i, :][3] = im_src.shape[0]
        return det
Beispiel #24
0
def demo_maskrcnn(network,
                  ctx,
                  prefix,
                  epoch,
                  vis=True,
                  has_rpn=True,
                  thresh=0.001):

    assert has_rpn, "Only has_rpn==True has been supported."
    sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES,
                                                num_anchors=config.NUM_ANCHORS)
    arg_params, aux_params = load_param(prefix,
                                        epoch,
                                        convert=True,
                                        ctx=ctx,
                                        process=True)
    split = False
    max_image_shape = (1, 3, 1024, 1024)
    max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))]
    mod = MutableModule(symbol=sym,
                        data_names=["data", "im_info"],
                        label_names=None,
                        max_data_shapes=max_data_shapes,
                        context=ctx)
    mod.bind(data_shapes=max_data_shapes,
             label_shapes=None,
             for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    class OneDataBatch():
        def __init__(self, img):
            im_info = mx.nd.array([[img.shape[0], img.shape[1], 1.0]])
            img = np.transpose(img, (2, 0, 1))
            img = img[np.newaxis, (2, 1, 0)]
            self.data = [mx.nd.array(img), im_info]
            self.label = None
            self.provide_label = None
            self.provide_data = [("data", (1, 3, img.shape[2], img.shape[3])),
                                 ("im_info", (1, 3))]

    imglist_file = os.path.join(default.dataset_path, 'imglists', 'test.lst')
    #print(default.dataset_path)
    assert os.path.exists(imglist_file), 'Path does not exist: {}'.format(
        imglist_file)
    imgfiles_list = []
    with open(imglist_file, 'r') as f:
        for line in f:
            file_list = dict()
            label = line.strip().split('\t')
            file_list['img_path'] = label[1]
            imgfiles_list.append(file_list)
    roidb = []
    index = 0
    submit_dir = os.path.join(default.dataset_path, 'submit')
    if not os.path.exists(submit_dir):
        os.makedirs(submit_dir)
    img_dir = os.path.join(default.dataset_path, 'test_result_img')
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)
    for im in range(len(imgfiles_list)):
        index = im + 1
        img_path = os.path.join(default.dataset_path, 'ch4_test_images',
                                'img_' + str(index) + '.jpg')
        img_ori = cv2.imread(img_path)
        batch = OneDataBatch(img_ori)
        mod.forward(batch, False)
        results = mod.get_outputs()
        output = dict(zip(mod.output_names, results))
        rois = output['rois_output'].asnumpy()[:, 1:]
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        mask_output = output['mask_prob_output'].asnumpy()
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes,
                                [img_ori.shape[0], img_ori.shape[1]])
        nms = py_nms_wrapper(config.TEST.NMS)
        boxes = pred_boxes
        CLASSES = ('__background__', 'text')
        all_boxes = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))]
        all_masks = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))]
        label = np.argmax(scores, axis=1)
        label = label[:, np.newaxis]
        for cls in CLASSES:
            cls_ind = CLASSES.index(cls)
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_masks = mask_output[:, cls_ind, :, :]
            cls_scores = scores[:, cls_ind, np.newaxis]
            #print cls_scores.shape, label.shape
            keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0]
            cls_masks = cls_masks[keep, :, :]
            dets = np.hstack(
                (cls_boxes, cls_scores)).astype(np.float32)[keep, :]
            keep_la = nms(dets)
            print('------------------------keep_la', keep_la)
            all_boxes[cls_ind] = dets[keep_la, :]
            all_masks[cls_ind] = cls_masks[keep_la, :, :]
        boxes_this_image = [[]
                            ] + [all_boxes[j] for j in range(1, len(CLASSES))]
        masks_this_image = [[]
                            ] + [all_masks[j] for j in range(1, len(CLASSES))]
        import copy
        import random
        class_names = CLASSES
        color_white = (255, 255, 255)
        scale = 1.0
        im = copy.copy(img_ori)
        num_box = 1
        num_boxes = 0
        mini_box = np.zeros((4, 2))
        mini_box = np.int32(mini_box)
        if (len(dets) == 0):
            submit_path = os.path.join(submit_dir,
                                       'res_img_{}.txt'.format(index))
            result_txt = open(submit_path, 'a')
            for i in range(0, 4):
                result_txt.write(str(mini_box[i][0]))
                result_txt.write(',')
                result_txt.write(str(mini_box[i][1]))
                if i < 3:
                    result_txt.write(',')
            result_txt.write('\r\n')
            result_txt.close()
        for k, name in enumerate(class_names):
            if name == '__background__':
                continue
            color = (random.randint(0, 256), random.randint(0, 256),
                     random.randint(0, 256))  # generate a random color
            dets = boxes_this_image[k]
            masks = masks_this_image[k]
            #im_binary_merge = np.zeros(im[:,:,0].shape)
            print('------------------------len(dets)', len(dets))
            for i in range(len(dets)):
                bbox_i = dets[i, :4] * scale
                #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3]  :
                if bbox_i[2] == bbox_i[0] or bbox_i[3] == bbox_i[1]:
                    continue
                score_i = dets[i, -1]
                bbox_i = map(int, bbox_i)
                mask_i = masks[i, :, :]
                mask_i = masks[i, :, :]
                mask_i = cv2.resize(mask_i, (bbox_i[2] - bbox_i[0],
                                             (bbox_i[3] - bbox_i[1])),
                                    interpolation=cv2.INTER_LINEAR)
                mask_i[mask_i > 0.3] = 1
                mask_i[mask_i <= 0.3] = 0
                im_binary_i = np.zeros(im[:, :, 0].shape)
                im_binary_i[bbox_i[1]:bbox_i[3],
                            bbox_i[0]:bbox_i[2]] = im_binary_i[
                                bbox_i[1]:bbox_i[3],
                                bbox_i[0]:bbox_i[2]] + mask_i
                #print("len(dets is )-------------------------",len(dets))
                overlap = []
                overlap_other = []
                for j in range(len(dets)):
                    if i == j:
                        continue
                    bbox_j = dets[j, :4] * scale
                    #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3]  :
                    if bbox_j[2] == bbox_j[0] or bbox_j[3] == bbox_j[1]:
                        continue
                    num_box += 1
                    score_j = dets[j, -1]
                    bbox_j = map(int, bbox_j)
                    mask_j = masks[j, :, :]
                    mask_j = masks[j, :, :]
                    mask_j = cv2.resize(mask_j, (bbox_j[2] - bbox_j[0],
                                                 (bbox_j[3] - bbox_j[1])),
                                        interpolation=cv2.INTER_LINEAR)
                    #print("mask_j,score_j,img_path------------------------",mask_j,score_j,img_path)
                    mask_j[mask_j > 0.3] = 1
                    mask_j[mask_j <= 0.3] = 0
                    im_binary_j = np.zeros(im[:, :, 0].shape)
                    im_binary_j[bbox_j[1]:bbox_j[3],
                                bbox_j[0]:bbox_j[2]] = im_binary_j[
                                    bbox_j[1]:bbox_j[3],
                                    bbox_j[0]:bbox_j[2]] + mask_j
                    im_binary = im_binary_i + im_binary_j
                    #mask_inter = mask_i+mask_j
                    ni = np.sum(im_binary_i == 1)
                    nj = np.sum(im_binary_j == 1)
                    nij = np.sum(im_binary == 2)
                    IOU_ratio = float(nij) / (ni + nj - nij)
                    overlap.append(IOU_ratio)
                    #if np.sum(im_binary_i == 1) == 0:
                    #  continue
                    #if np.sum(im_binary_j == 1) == 0:
                    #  continue
                    IOU_ratio_self = float(
                        np.sum(im_binary == 2)) / np.sum(im_binary_i == 1)
                    overlap_other.append(IOU_ratio_self)
                    #IOU_ratio_other = float(np.sum(im_binary == 2)) / np.sum(im_binary_j == 1)
                    #overlap_other.append(IOU_ratio_other)

                if num_box == 1:
                    overlap.append(0)
                    overlap_other.append(0)
                if np.max(overlap) < 0.6 and split == False and np.max(
                        overlap_other) < 0.9:
                    num_boxes += 1
                    #cv2.rectangle(im, (bbox_i[0], bbox_i[1]), (bbox_i[2], bbox_i[3]), color=color, thickness=2)
                    cv2.putText(im,
                                '%s %.3f' % (class_names[k], score_i),
                                (bbox_i[0], bbox_i[1] + 10),
                                color=color_white,
                                fontFace=cv2.FONT_HERSHEY_COMPLEX,
                                fontScale=0.5)
                    px = np.where(mask_i == 1)
                    x_min = np.min(px[1])
                    y_min = np.min(px[0])
                    x_max = np.max(px[1])
                    y_max = np.max(px[0])
                    if x_max - x_min <= 1 or y_max - y_min <= 1:
                        continue
                    mask_color = random.randint(0, 255)
                    c = random.randint(0, 2)
                    mini_boxt = np.zeros((4, 2))
                    target = im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2],
                                c] + mask_color * mask_i
                    target[target >= 255] = 255
                    im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2], c] = target
                    mini_box = minimum_bounding_rectangle(im_binary_i)
                    mini_boxt[0][0] = mini_box[0][1]
                    mini_boxt[0][1] = mini_box[0][0]
                    mini_boxt[1][0] = mini_box[1][1]
                    mini_boxt[1][1] = mini_box[1][0]
                    mini_boxt[2][0] = mini_box[2][1]
                    mini_boxt[2][1] = mini_box[2][0]
                    mini_boxt[3][0] = mini_box[3][1]
                    mini_boxt[3][1] = mini_box[3][0]
                    mini_box = mini_boxt
                    mini_box = np.int32(mini_box)
                    #print("---------------",mini_box)
                    cv2.polylines(im, [mini_box], 1, (255, 255, 255))
                    submit_path = os.path.join(submit_dir,
                                               'res_img_{}.txt'.format(index))
                    result_txt = open(submit_path, 'a')
                    for i in range(0, 4):
                        result_txt.write(str(mini_box[i][0]))
                        result_txt.write(',')
                        result_txt.write(str(mini_box[i][1]))
                        if i < 3:
                            result_txt.write(',')
                    result_txt.write('\r\n')
                    result_txt.close()
                if split == True:
                    if np.max(overlap_other) > 0.6:
                        W = bbox_j[2] - bbox_j[0]
                        H = bbox_j[3] - bbox_j[1]
                        bbox_i[2] = bbox_i[2] - W
                        bbox_i[3] = bbox_i[3] - H
                    num_boxes += 1
                    cv2.rectangle(im, (bbox_i[0], bbox_i[1]),
                                  (bbox_i[2], bbox_i[3]),
                                  color=color,
                                  thickness=2)
                    cv2.putText(im,
                                '%s %.3f' % (class_names[k], score_i),
                                (bbox_i[0], bbox_i[1] + 10),
                                color=color_white,
                                fontFace=cv2.FONT_HERSHEY_COMPLEX,
                                fontScale=0.5)
                    px = np.where(mask_i == 1)
                    x_min = np.min(px[1])
                    y_min = np.min(px[0])
                    x_max = np.max(px[1])
                    y_max = np.max(px[0])
                    if x_max - x_min <= 1 or y_max - y_min <= 1:
                        continue
                    mask_color = random.randint(0, 255)
                    c = random.randint(0, 2)
                    target = im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2],
                                c] + mask_color * mask_i
                    target[target >= 255] = 255
                    im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2], c] = target
                    #inst_path = os.path.join(inst_dir,'result_{}_{}.mat'.format(index,num_boxes))
                    #io.savemat(inst_path, {'Segmentation': im_binary_i})
            #numbox = open('data/boxnum.txt','a')
            #numbox.write(str(num_boxes)+'\n')
            #numbox.close()
            result_img_path = os.path.join(img_dir,
                                           'result_{}.jpg'.format(index))
            cv2.imwrite(result_img_path, im)
    #zip_submit_dir = 'script_test_ch4'
    zip_file = os.path.join('script_test_ch4', 'submit.zip')
    createZip(submit_dir, zip_file)
    os.system(
        "python ./script_test_ch4/script.py -g=./script_test_ch4/gt.zip -s=./script_test_ch4/submit.zip"
    )
    def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False):
        #print('in_detect', threshold, scales, do_flip, do_nms)
        #print('img_shape: ',img.shape)
        proposals_list = []
        scores_list = []
        landmarks_list = []
        timea = datetime.datetime.now()
        flips = [0]
        if do_flip:
            flips = [0, 1]

        for im_scale in scales:
            for flip in flips:
                if im_scale != 1.0:
                    im = cv2.resize(img,
                                    None,
                                    None,
                                    fx=im_scale,
                                    fy=im_scale,
                                    interpolation=cv2.INTER_LINEAR)
                else:
                    im = img.copy()
                im = im[:, :, np.newaxis]
                if flip:
                    im = im[:, ::-1, :]
                if self.nocrop:
                    if im.shape[0] % 32 == 0:
                        h = im.shape[0]
                    else:
                        h = (im.shape[0] // 32 + 1) * 32
                    if im.shape[1] % 32 == 0:
                        w = im.shape[1]
                    else:
                        w = (im.shape[1] // 32 + 1) * 32
                    _im = np.zeros((h, w, 1), dtype=np.float32)
                    _im[0:im.shape[0], 0:im.shape[1], :] = im
                    im = _im
                else:
                    im = im.astype(np.float32)
                if self.debug:
                    timeb = datetime.datetime.now()
                    diff = timeb - timea
                    print('X1 uses', diff.total_seconds(), 'seconds')
                #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
                #im_info = [im.shape[0], im.shape[1], im_scale]
                im_info = [im.shape[0], im.shape[1]]
                im_tensor = np.zeros((1, 1, im.shape[0], im.shape[1]))
                #print('im_tensor_shape: ', im_tensor.shape)
                #print('im_shape: ', im.shape)
                for i in range(1):
                    im_tensor[
                        0,
                        i, :, :] = (im[:, :, i] / self.pixel_scale -
                                    self.pixel_means[i]) / self.pixel_stds[i]
                if self.debug:
                    timeb = datetime.datetime.now()
                    diff = timeb - timea
                    print('X2 uses', diff.total_seconds(), 'seconds')
                data = nd.array(im_tensor)
                db = mx.io.DataBatch(data=(data, ),
                                     provide_data=[('data', data.shape)])
                if self.debug:
                    timeb = datetime.datetime.now()
                    diff = timeb - timea
                    print('X3 uses', diff.total_seconds(), 'seconds')
                self.model.forward(db, is_train=False)
                net_out = self.model.get_outputs()
                #print('Len:out: ', len(net_out))
                #print('out1: ', net_out[0])
                #print('outbbox: ', net_out[1])
                #post_nms_topN = self._rpn_post_nms_top_n
                #min_size_dict = self._rpn_min_size_fpn

                for _idx, s in enumerate(self._feat_stride_fpn):
                    #if len(scales)>1 and s==32 and im_scale==scales[-1]:
                    #  continue
                    _key = 'stride%s' % s
                    stride = int(s)
                    #if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]):
                    #  continue
                    if self.use_landmarks:
                        idx = _idx * 3
                    else:
                        idx = _idx * 2
                    #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
                    scores = net_out[idx].asnumpy()
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('A uses', diff.total_seconds(), 'seconds')
                    #print(scores.shape)
                    #print('scores',stride, scores.shape, file=sys.stderr)
                    scores = scores[:,
                                    self._num_anchors['stride%s' % s]:, :, :]

                    idx += 1
                    bbox_deltas = net_out[idx].asnumpy()

                    #if DEBUG:
                    #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                    #    print 'scale: {}'.format(im_info[2])

                    #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
                    height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                    A = self._num_anchors['stride%s' % s]
                    #print('A: ',A)
                    K = height * width
                    anchors_fpn = self._anchors_fpn['stride%s' % s]
                    anchors = anchors_plane(height, width, stride, anchors_fpn)
                    #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                    anchors = anchors.reshape((K * A, 4))
                    #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr)
                    #print('HW', (height, width), file=sys.stderr)
                    #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr)
                    #print('anchors', anchors.shape, file=sys.stderr)
                    #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr)
                    #print('scores', scores.shape, file=sys.stderr)

                    scores = self._clip_pad(scores, (height, width))
                    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                    #print('pre', bbox_deltas.shape, height, width)
                    bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                    #print('after', bbox_deltas.shape, height, width)
                    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
                    #print('bbox_deltas.shape[3]:',bbox_deltas.shape[3])
                    bbox_pred_len = bbox_deltas.shape[3] // A
                    #print('bbox_deltas.shape:',bbox_deltas.shape)
                    #print('boxlen:',bbox_pred_len)
                    bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))

                    #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                    proposals = self.bbox_pred(anchors, bbox_deltas)
                    proposals = clip_boxes(proposals, im_info[:2])

                    #if self.vote:
                    #  if im_scale>1.0:
                    #    keep = self._filter_boxes2(proposals, 160*im_scale, -1)
                    #  else:
                    #    keep = self._filter_boxes2(proposals, -1, 100*im_scale)
                    #  if stride==4:
                    #    keep = self._filter_boxes2(proposals, 12*im_scale, -1)
                    #    proposals = proposals[keep, :]
                    #    scores = scores[keep]

                    #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
                    #proposals = proposals[keep, :]
                    #scores = scores[keep]
                    #print('333', proposals.shape)

                    scores_ravel = scores.ravel()
                    #print('__shapes', proposals.shape, scores_ravel.shape)
                    #print('max score', np.max(scores_ravel))
                    order = np.where(scores_ravel >= threshold)[0]
                    #print('order:',order)
                    #print('score_order:',scores_ravel[order])

                    #_scores = scores_ravel[order]
                    #_order = _scores.argsort()[::-1]
                    #order = order[_order]
                    #order = [50]
                    proposals = proposals[order, :]
                    scores = scores[order]
                    if stride == 4 and self.decay4 < 1.0:
                        scores *= self.decay4
                    if flip:
                        oldx1 = proposals[:, 0].copy()
                        oldx2 = proposals[:, 2].copy()
                        proposals[:, 0] = im.shape[1] - oldx2 - 1
                        proposals[:, 2] = im.shape[1] - oldx1 - 1

                    proposals[:, 0:4] /= im_scale

                    proposals_list.append(proposals)
                    scores_list.append(scores)

                    if not self.vote and self.use_landmarks:
                        idx += 1
                        landmark_deltas = net_out[idx].asnumpy()
                        landmark_deltas = self._clip_pad(
                            landmark_deltas, (height, width))
                        landmark_pred_len = landmark_deltas.shape[1] // A
                        landmark_deltas = landmark_deltas.transpose(
                            (0, 2, 3, 1)).reshape(
                                (-1, 5, landmark_pred_len // 5))
                        #print(landmark_deltas.shape, landmark_deltas)
                        landmarks = self.landmark_pred(anchors,
                                                       landmark_deltas)
                        landmarks = landmarks[order, :]

                        if flip:
                            landmarks[:, :,
                                      0] = im.shape[1] - landmarks[:, :, 0] - 1
                            #for a in range(5):
                            #  oldx1 = landmarks[:, a].copy()
                            #  landmarks[:,a] = im.shape[1] - oldx1 - 1
                            order = [1, 0, 2, 4, 3]
                            flandmarks = landmarks.copy()
                            for idx, a in enumerate(order):
                                flandmarks[:, idx, :] = landmarks[:, a, :]
                                #flandmarks[:, idx*2] = landmarks[:,a*2]
                                #flandmarks[:, idx*2+1] = landmarks[:,a*2+1]
                            landmarks = flandmarks
                        landmarks[:, :, 0:2] /= im_scale
                        #landmarks /= im_scale
                        #landmarks = landmarks.reshape( (-1, landmark_pred_len) )
                        landmarks_list.append(landmarks)
                        #proposals = np.hstack((proposals, landmarks))

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('B uses', diff.total_seconds(), 'seconds')
        proposals = np.vstack(proposals_list)
        landmarks = None
        if proposals.shape[0] == 0:
            if self.use_landmarks:
                landmarks = np.zeros((0, 5, 2))
            return np.zeros((0, 5)), landmarks
        scores = np.vstack(scores_list)
        #print('shapes', proposals.shape, scores.shape)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        #if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        proposals = proposals[order, :]
        scores = scores[order]
        if not self.vote and self.use_landmarks:
            landmarks = np.vstack(landmarks_list)
            landmarks = landmarks[order].astype(np.float32, copy=False)

        pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)
        if not self.vote:
            #print('pre_det_type: ', type(pre_det))
            keep = self.nms(pre_det)
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = det[keep, :]
            if self.use_landmarks:
                landmarks = landmarks[keep]
        else:
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = self.bbox_vote(det)
        #if self.use_landmarks:
        #  det = np.hstack((det, landmarks))

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('C uses', diff.total_seconds(), 'seconds')
        return det, scores  #landmarks
    def detect(self, img, threshold=0.5):
        """
        Detect all the faces and landmarks in an image
        :param img: input image
        :param threshold: detection threshold
        :return: tuple faces, landmarks
        """
        proposals_list = []
        scores_list = []
        landmarks_list = []
        im_tensor, im_info, im_scale = self._preprocess_image(img)
        net_out = self.model(im_tensor)
        net_out = [elt.numpy() for elt in net_out]
        sym_idx = 0

        for _idx, s in enumerate(self._feat_stride_fpn):
            _key = 'stride%s' % s
            scores = net_out[sym_idx]
            scores = scores[:, :, :, self._num_anchors['stride%s' % s]:]

            bbox_deltas = net_out[sym_idx + 1]
            height, width = bbox_deltas.shape[1], bbox_deltas.shape[2]

            A = self._num_anchors['stride%s' % s]
            K = height * width
            anchors_fpn = self._anchors_fpn['stride%s' % s]
            anchors = anchors_plane(height, width, s, anchors_fpn)
            anchors = anchors.reshape((K * A, 4))
            scores = scores.reshape((-1, 1))

            bbox_deltas = bbox_deltas
            bbox_pred_len = bbox_deltas.shape[3] // A
            bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
            bbox_deltas[:, 0::4] = bbox_deltas[:, 0::4] * self.bbox_stds[0]
            bbox_deltas[:, 1::4] = bbox_deltas[:, 1::4] * self.bbox_stds[1]
            bbox_deltas[:, 2::4] = bbox_deltas[:, 2::4] * self.bbox_stds[2]
            bbox_deltas[:, 3::4] = bbox_deltas[:, 3::4] * self.bbox_stds[3]
            proposals = self.bbox_pred(anchors, bbox_deltas)

            proposals = clip_boxes(proposals, im_info[:2])

            if s == 4 and self.decay4 < 1.0:
                scores *= self.decay4

            scores_ravel = scores.ravel()
            order = np.where(scores_ravel >= threshold)[0]
            proposals = proposals[order, :]
            scores = scores[order]

            proposals[:, 0:4] /= im_scale
            proposals_list.append(proposals)
            scores_list.append(scores)

            landmark_deltas = net_out[sym_idx + 2]
            landmark_pred_len = landmark_deltas.shape[3] // A
            landmark_deltas = landmark_deltas.reshape(
                (-1, 5, landmark_pred_len // 5))
            landmarks = self.landmark_pred(anchors, landmark_deltas)
            landmarks = landmarks[order, :]

            landmarks[:, :, 0:2] /= im_scale
            landmarks_list.append(landmarks)
            sym_idx += 3

        proposals = np.vstack(proposals_list)
        if proposals.shape[0] == 0:
            landmarks = np.zeros((0, 5, 2))
            return np.zeros((0, 5)), landmarks
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]

        proposals = proposals[order, :]
        scores = scores[order]
        landmarks = np.vstack(landmarks_list)
        landmarks = landmarks[order].astype(np.float32, copy=False)

        pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)
        keep = self.nms(pre_det)
        det = np.hstack((pre_det, proposals[:, 4:]))
        det = det[keep, :]
        landmarks = landmarks[keep]

        return det, landmarks
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0][0].context.device_id)
        #nms = cpu_nms_wrapper(self._threshold)

        cls_prob_dict = dict(zip(self.fpn_keys, in_data[0:len(self.fpn_keys)]))
        bbox_pred_dict = dict(zip(self.fpn_keys, in_data[len(self.fpn_keys):2*len(self.fpn_keys)]))
        #for i in xrange(6):
        #  print(i, in_data[i].asnumpy().shape)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size_dict = self._rpn_min_size_fpn

        proposals_list = []
        scores_list = []
        for s in self._feat_stride_fpn:
            stride = int(s)
            scores = cls_prob_dict['stride%s'%s].asnumpy()[:, self._num_anchors['stride%s'%s]:, :, :]
            bbox_deltas = bbox_pred_dict['stride%s'%s].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]

            if DEBUG:
                print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                print 'scale: {}'.format(im_info[2])

            #height, width = int(im_info[0] / stride), int(im_info[1] / stride)
            height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

            A = self._num_anchors['stride%s'%s]
            K = height * width

            anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32))
            anchors = anchors.reshape((K * A, 4))

            #print('pre', bbox_deltas.shape, height, width)
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            #print('after', bbox_deltas.shape, height, width)
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            #print(anchors.shape, bbox_deltas.shape, A, K)
            proposals = self._bbox_pred(anchors, bbox_deltas)

            proposals = clip_boxes(proposals, im_info[:2])

            keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]
            proposals_list.append(proposals)
            scores_list.append(scores)

        proposals = np.vstack(proposals_list)
        scores = np.vstack(scores_list)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores)).astype(np.float32)

        if np.shape(det)[0] == 0:
            print "Something wrong with the input image(resolution is too low?), generate fake proposals for it."
            proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
            scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
            det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)

        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]

        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def postprocess(net_out, threshold, ctx_id, im_scale, im_info):
    # im_info = [640, 640]
    flip = False
    decay4 = 0.5
    vote = False
    fpn_keys = []
    anchor_cfg = None
    bbox_stds = [1.0, 1.0, 1.0, 1.0]
    # im_scale = 1.0
    landmark_std = 1.0
    nms_threshold = 0.4

    proposals_list = []
    scores_list = []
    landmarks_list = []
    strides_list = []

    use_landmarks = True

    if ctx_id >= 0:
        nms = gpu_nms_wrapper(nms_threshold, ctx_id)
    else:
        nms = cpu_nms_wrapper(nms_threshold)

    use_landmarks = True
    _ratio = (1., )

    _feat_stride_fpn = [32, 16, 8]
    anchor_cfg = {
        '32': {
            'SCALES': (32, 16),
            'BASE_SIZE': 16,
            'RATIOS': _ratio,
            'ALLOWED_BORDER': 9999
        },
        '16': {
            'SCALES': (8, 4),
            'BASE_SIZE': 16,
            'RATIOS': _ratio,
            'ALLOWED_BORDER': 9999
        },
        '8': {
            'SCALES': (2, 1),
            'BASE_SIZE': 16,
            'RATIOS': _ratio,
            'ALLOWED_BORDER': 9999
        },
    }

    for s in _feat_stride_fpn:
        fpn_keys.append('stride%s' % s)

    dense_anchor = False

    _anchors_fpn = dict(
        zip(fpn_keys,
            generate_anchors_fpn(dense_anchor=dense_anchor, cfg=anchor_cfg)))
    for k in _anchors_fpn:
        v = _anchors_fpn[k].astype(np.float32)
        _anchors_fpn[k] = v

    _num_anchors = dict(
        zip(fpn_keys, [anchors.shape[0] for anchors in _anchors_fpn.values()]))
    sym_idx = 0

    for _idx, s in enumerate(_feat_stride_fpn):
        # print(sym_idx)
        _key = 'stride%s' % s
        # print(_key)
        stride = int(s)
        scores = net_out[sym_idx]  #.asnumpy()

        scores = scores[:, _num_anchors['stride%s' % s]:, :, :]
        bbox_deltas = net_out[sym_idx + 1]  # .asnumpy()

        height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

        A = _num_anchors['stride%s' % s]
        K = height * width
        anchors_fpn = _anchors_fpn['stride%s' % s]
        anchors = anchors_plane(height, width, stride, anchors_fpn)
        anchors = anchors.reshape((K * A, 4))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
        bbox_pred_len = bbox_deltas.shape[3] // A
        bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
        bbox_deltas[:, 0::4] = bbox_deltas[:, 0::4] * bbox_stds[0]
        bbox_deltas[:, 1::4] = bbox_deltas[:, 1::4] * bbox_stds[1]
        bbox_deltas[:, 2::4] = bbox_deltas[:, 2::4] * bbox_stds[2]
        bbox_deltas[:, 3::4] = bbox_deltas[:, 3::4] * bbox_stds[3]
        proposals = bbox_pred(anchors, bbox_deltas)

        proposals = clip_boxes(proposals, im_info[:2])

        if stride == 4 and decay4 < 1.0:
            scores *= decay4

        scores_ravel = scores.ravel()

        order = np.where(scores_ravel >= threshold)[0]

        proposals = proposals[order, :]
        scores = scores[order]
        if flip:
            oldx1 = proposals[:, 0].copy()
            oldx2 = proposals[:, 2].copy()
            proposals[:, 0] = im.shape[1] - oldx2 - 1
            proposals[:, 2] = im.shape[1] - oldx1 - 1

        #proposals[:,0:4] /= im_scale

        #print(proposals[:,0])
        proposals[:, 0] /= im_scale[0]
        #print(pp)
        proposals[:, 1] /= im_scale[1]
        proposals[:, 2] /= im_scale[0]
        proposals[:, 3] /= im_scale[1]
        #print(proposals[:,0])

        proposals_list.append(proposals)
        scores_list.append(scores)
        if nms_threshold < 0.0:
            _strides = np.empty(shape=(scores.shape), dtype=np.float32)
            _strides.fill(stride)
            strides_list.append(_strides)
        if not vote and use_landmarks:
            landmark_deltas = net_out[sym_idx + 2]  #.asnumpy()
            # print(landmark_deltas)
            landmark_pred_len = landmark_deltas.shape[1] // A
            landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape(
                (-1, 5, landmark_pred_len // 5))
            landmark_deltas *= landmark_std
            landmarks = landmark_pred(anchors, landmark_deltas)
            landmarks = landmarks[order, :]

            if flip:
                landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1
                order = [1, 0, 2, 4, 3]
                flandmarks = landmarks.copy()
                for idx, a in enumerate(order):
                    flandmarks[:, idx, :] = landmarks[:, a, :]
                landmarks = flandmarks
            landmarks[:, :, 0:2] /= im_scale
            landmarks_list.append(landmarks)

        if use_landmarks:
            sym_idx += 3
        else:
            sym_idx += 2

    proposals = np.vstack(proposals_list)

    landmarks = None
    if proposals.shape[0] == 0:
        if use_landmarks:
            landmarks = np.zeros((0, 5, 2))
        if nms_threshold < 0.0:
            return np.zeros((0, 6)), landmarks
        else:
            return np.zeros((0, 5)), landmarks

    scores = np.vstack(scores_list)
    scores_ravel = scores.ravel()

    order = scores_ravel.argsort()[::-1]

    proposals = proposals[order, :]

    scores = scores[order]
    if nms_threshold < 0.0:
        strides = np.vstack(strides_list)
        strides = strides[order]
    if not vote and use_landmarks:
        landmarks = np.vstack(landmarks_list)
        landmarks = landmarks[order].astype(np.float32, copy=False)

    if nms_threshold > 0.0:
        pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)
        if not vote:
            keep = nms(pre_det)
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = det[keep, :]
            if use_landmarks:
                landmarks = landmarks[keep]
            else:
                det = np.hstack((pre_det, proposals[:, 4:]))
                det = bbox_vote(det, nms_threshold)
    elif nms_threshold < 0.0:
        det = np.hstack((proposals[:,
                                   0:4], scores, strides)).astype(np.float32,
                                                                  copy=False)
    else:
        det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                            copy=False)

    return det, landmarks
Beispiel #29
0
    def detect(self, img, threshold=0.05, scales=[1.0]):
        proposals_list = []
        scores_list = []

        for im_scale in scales:

            if im_scale != 1.0:
                im = cv2.resize(img,
                                None,
                                None,
                                fx=im_scale,
                                fy=im_scale,
                                interpolation=cv2.INTER_LINEAR)
            else:
                im = img
            im = im.astype(np.float32)
            # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
            im_info = [im.shape[0], im.shape[1], im_scale]
            im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
            for i in range(3):
                im_tensor[0,
                          i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i]
            data = nd.array(im_tensor)
            db = mx.io.DataBatch(data=(data, ),
                                 provide_data=[('data', data.shape)])
            self.model.forward(db, is_train=False)
            net_out = self.model.get_outputs()
            pre_nms_topN = self._rpn_pre_nms_top_n
            # post_nms_topN = self._rpn_post_nms_top_n
            # min_size_dict = self._rpn_min_size_fpn

            for s in self._feat_stride_fpn:
                if len(scales) > 1 and s == 32 and im_scale == scales[-1]:
                    continue
                _key = 'stride%s' % s
                stride = int(s)
                idx = 0
                if s == 16:
                    idx = 2
                elif s == 8:
                    idx = 4
                print('getting',
                      im_scale,
                      stride,
                      idx,
                      len(net_out),
                      data.shape,
                      file=sys.stderr)
                scores = net_out[idx].asnumpy()
                # print(scores.shape)
                idx += 1
                # print('scores',stride, scores.shape, file=sys.stderr)
                scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]
                bbox_deltas = net_out[idx].asnumpy()

                # if DEBUG:
                #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                #    print 'scale: {}'.format(im_info[2])

                _height, _width = int(im_info[0] / stride), int(im_info[1] /
                                                                stride)
                height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                A = self._num_anchors['stride%s' % s]
                K = height * width

                anchors = anchors_plane(
                    height, width, stride,
                    self._anchors_fpn['stride%s' % s].astype(np.float32))
                # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                anchors = anchors.reshape((K * A, 4))

                # print('pre', bbox_deltas.shape, height, width)
                bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                # print('after', bbox_deltas.shape, height, width)
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
                    (-1, 4))

                scores = self._clip_pad(scores, (height, width))
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                proposals = self._bbox_pred(anchors, bbox_deltas)
                # proposals = anchors

                proposals = clip_boxes(proposals, im_info[:2])

                # keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
                # proposals = proposals[keep, :]
                # scores = scores[keep]
                # print('333', proposals.shape)

                scores_ravel = scores.ravel()
                order = scores_ravel.argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]

                proposals /= im_scale

                proposals_list.append(proposals)
                scores_list.append(scores)

        proposals = np.vstack(proposals_list)
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        # if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        # if pre_nms_topN > 0:
        #    order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores)).astype(np.float32)

        # if np.shape(det)[0] == 0:
        #    print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.")
        #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
        #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
        #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)

        if self.nms_threshold < 1.0:
            keep = self.nms(det)
            det = det[keep, :]
        if threshold > 0.0:
            keep = np.where(det[:, 4] >= threshold)[0]
            det = det[keep, :]
        return det
    def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False):
        print(
            'get into detect, confi thresold={}, scales={}, do_flip={}'.format(
                threshold, scales, do_flip))
        proposals_list = []
        scores_list = []
        landmarks_list = []
        timea = datetime.datetime.now()
        flips = [0]
        if do_flip:
            flips = [0, 1]

        #TODO 根据scale给输入的图片做resize
        for im_scale in scales:
            for flip in flips:
                if im_scale != 1.0:
                    im = cv2.resize(img,
                                    None,
                                    None,
                                    fx=im_scale,
                                    fy=im_scale,
                                    interpolation=cv2.INTER_LINEAR)
                else:
                    im = img.copy()
                # 对图像做翻转
                if flip:
                    im = im[:, ::-1, :]
                # 对图像做裁剪
                if self.nocrop:
                    if im.shape[0] % 32 == 0:
                        h = im.shape[0]
                    else:
                        h = (im.shape[0] // 32 + 1) * 32
                    if im.shape[1] % 32 == 0:
                        w = im.shape[1]
                    else:
                        w = (im.shape[1] // 32 + 1) * 32
                    _im = np.zeros((h, w, 3), dtype=np.float32)
                    _im[0:im.shape[0], 0:im.shape[1], :] = im
                    im = _im
                else:
                    im = im.astype(np.float32)
                im_info = [im.shape[0], im.shape[1]]  #h,w
                im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
                for i in range(3):
                    im_tensor[0, i, :, :] = (
                        im[:, :, 2 - i] / self.pixel_scale -
                        self.pixel_means[2 - i]) / self.pixel_stds[
                            2 - i]  #TODO 这里好像将Channel顺序倒过来了,与image.py保持一致
                data = np.array(im_tensor)

                # 读入模型进行推理,得到预测值
                net_out = self.get_pred(data)
                if self.debug:
                    for key in net_out.keys():
                        print('{} = {}\n'.format(key, net_out[key].shape))

                for _idx, s in enumerate(self._feat_stride_fpn):
                    # print('begin stride{}-------------------------------------------------\n'.format(s))
                    _key = 'stride%s' % s
                    stride = int(s)
                    # print('getting im_scale={}, stride={}, len(net_out)={}, data.shape={}'.format(im_scale, stride, len(net_out), data.shape))
                    scores = net_out['rpn_cls_prob_stride%s' %
                                     s]  #TODO 要注意这里是nhwc不是nchw
                    if self.debug:
                        print('get score:', scores.shape)

                    # print('stride{}: scores before shape={}, idx={}'.format(stride, scores.shape, self._num_anchors['stride%s' % s]))
                    scores = scores[:, 1].reshape(
                        (-1, 1))  #TODO: (H*W*A, 1) #这里的1表示正类的概率

                    if self.debug:
                        print('AAAAstride{}: scores after shape={}'.format(
                            stride, scores.shape))

                    bbox_deltas = net_out['rpn_bbox_pred_stride%s' %
                                          s]  #TODO NHW8

                    height, width = bbox_deltas.shape[1], bbox_deltas.shape[2]
                    A = self._num_anchors['stride%s' % s]
                    K = height * width
                    anchors_fpn = self._anchors_fpn['stride%s' % s]
                    anchors = anchors_plane(height, width, stride,
                                            anchors_fpn)  #获取该特征图上的所有anchor
                    #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                    anchors = anchors.reshape((K * A, 4))
                    if self.debug:
                        print('HW', (height, width))
                        print('anchors_fpn', anchors_fpn)
                        print('anchors', anchors.shape, '\n')

                    # scores = self._clip_pad_NCHW(scores, (height, width))  #(1, 4, H, W)
                    # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #(1, H, W, 4)
                    # print('scores reshape', scores.shape)
                    if self.debug:
                        print('before bbox_deltas', bbox_deltas.shape)
                    bbox_deltas = self._clip_pad_NHWC(
                        bbox_deltas, (height, width))  #(1, H, W, 8)
                    # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))#(1, H, W, 8)
                    bbox_pred_len = bbox_deltas.shape[3] // A  #4
                    bbox_deltas = bbox_deltas.reshape(
                        (-1, bbox_pred_len))  #(H*W*2, 4)
                    if self.debug:
                        print('after bbox_deltas', bbox_deltas.shape, height,
                              width, '\n')

                    #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                    proposals = self.bbox_pred(
                        anchors,
                        bbox_deltas)  #TODO important! 将anchor加上delta进行处理
                    proposals = clip_boxes(proposals, im_info[:2])

                    scores_ravel = scores.ravel()
                    max_score = np.max(scores_ravel)
                    print('proposals.shape={}, score_ravel.shape={}'.format(
                        proposals.shape, scores_ravel.shape))
                    print('max score', max_score)
                    order = np.where(scores_ravel >= threshold)[0]
                    #_scores = scores_ravel[order]
                    #_order = _scores.argsort()[::-1]
                    #order = order[_order]
                    proposals = proposals[order, :]
                    scores = scores[order]
                    if flip:
                        oldx1 = proposals[:, 0].copy()
                        oldx2 = proposals[:, 2].copy()
                        proposals[:, 0] = im.shape[1] - oldx2 - 1
                        proposals[:, 2] = im.shape[1] - oldx1 - 1

                    proposals[:, 0:
                              4] /= im_scale  #TODO important 在这里将找到的proposal给映射回原来图像的位置

                    proposals_list.append(proposals)
                    scores_list.append(scores)

                    if not self.vote and self.use_landmarks:
                        landmark_deltas = net_out['rpn_landmark_pred_stride%s'
                                                  % s]  #(1,20,H,W)
                        if self.debug:
                            print('before landmark_deltas',
                                  landmark_deltas.shape)
                        landmark_deltas = self._clip_pad_NCHW(
                            landmark_deltas, (height, width))
                        landmark_pred_len = landmark_deltas.shape[1] // A
                        landmark_deltas = landmark_deltas.transpose(
                            (0, 2, 3, 1)).reshape(
                                (-1, 5, landmark_pred_len // 5))
                        if self.debug:
                            print('after landmark_deltas',
                                  landmark_deltas.shape, landmark_deltas)
                        landmarks = self.landmark_pred(anchors,
                                                       landmark_deltas)
                        landmarks = landmarks[order, :]

                        if flip:
                            landmarks[:, :,
                                      0] = im.shape[1] - landmarks[:, :, 0] - 1
                            #for a in range(5):
                            #  oldx1 = landmarks[:, a].copy()
                            #  landmarks[:,a] = im.shape[1] - oldx1 - 1
                            order = [1, 0, 2, 4, 3]
                            flandmarks = landmarks.copy()
                            for idx, a in enumerate(order):
                                flandmarks[:, idx, :] = landmarks[:, a, :]
                                #flandmarks[:, idx*2] = landmarks[:,a*2]
                                #flandmarks[:, idx*2+1] = landmarks[:,a*2+1]
                            landmarks = flandmarks
                        landmarks[:, :, 0:2] /= im_scale
                        landmarks_list.append(landmarks)
                        if self.debug:
                            print(
                                'end stride{}-------------------------------------------------\n'
                                .format(s))

        proposals = np.vstack(proposals_list)
        landmarks = None
        if proposals.shape[0] == 0:
            if self.use_landmarks:
                landmarks = np.zeros((0, 5, 2))
            return np.zeros((0, 5)), landmarks
        # for i in range(len(scores_list)):
        #     print('hhhhh score,shape=',scores_list[i].shape)
        scores = np.vstack(scores_list)
        print('finally!!! proposals.shape={}, score.shape={}'.format(
            proposals.shape, scores.shape))
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]  # 按照score从大到小排序
        proposals = proposals[order, :]
        scores = scores[order]
        if self.debug:
            print('sort score=', scores)
        if not self.vote and self.use_landmarks:
            landmarks = np.vstack(landmarks_list)
            landmarks = landmarks[order].astype(np.float32, copy=False)

        pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)
        if not self.vote:
            print('begin to NMS!!\n')
            keep = self.nms(pre_det)
            # print('before hstack: pre_det={}, proposals.shape={}, proposals[:,4:]={}'.format(pre_det.shape, proposals.shape, proposals[:,4:]))
            det = np.hstack((pre_det, proposals[:, 4:]))
            # print('after hstack: pre_det={}, proposals.shape={}'.format(pre_det.shape, proposals.shape))
            det = det[keep, :]
            if self.use_landmarks:
                landmarks = landmarks[keep]
        else:
            det = np.hstack((pre_det, proposals[:, 4:]))
            det = self.bbox_vote(det)
        return det, landmarks
def demo_maskrcnn(network,
                  ctx,
                  prefix,
                  epoch,
                  img_path,
                  vis=True,
                  has_rpn=True,
                  thresh=0.001):

    assert has_rpn, "Only has_rpn==True has been supported."
    #sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES)
    arg_params, aux_params = load_param(prefix,
                                        epoch,
                                        convert=True,
                                        ctx=ctx,
                                        process=True)
    for k, v in arg_params.iteritems():
        print(k, v.shape)

    max_image_shape = (1, 3, 1024, 1024)
    max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))]
    mod = MutableModule(symbol=sym,
                        data_names=["data", "im_info"],
                        label_names=None,
                        max_data_shapes=max_data_shapes,
                        context=ctx)
    mod.bind(data_shapes=max_data_shapes,
             label_shapes=None,
             for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    class OneDataBatch():
        def __init__(self, img):
            im_info = mx.nd.array([[img.shape[0], img.shape[1], 1.0]])
            img = np.transpose(img, (2, 0, 1))
            img = img[np.newaxis, (2, 1, 0)]
            self.data = [mx.nd.array(img), im_info]
            self.label = None
            self.provide_label = None
            self.provide_data = [("data", (1, 3, img.shape[2], img.shape[3])),
                                 ("im_info", (1, 3))]

    img_ori = cv2.imread(img_path)
    batch = OneDataBatch(img_ori)
    mod.forward(batch, False)
    results = mod.get_outputs()
    output = dict(zip(mod.output_names, results))
    rois = output['rois_output'].asnumpy()[:, 1:]

    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
    mask_output = output['mask_prob_output'].asnumpy()

    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0], img_ori.shape[1]])

    nms = py_nms_wrapper(config.TEST.NMS)
    #nms = processing_nms_wrapper(config.TEST.NMS, 0.7)
    boxes = pred_boxes

    CLASSES = ('__background__', 'text')

    all_boxes = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))]
    all_masks = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))]
    label = np.argmax(scores, axis=1)
    label = label[:, np.newaxis]

    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_masks = mask_output[:, cls_ind, :, :]
        cls_scores = scores[:, cls_ind, np.newaxis]
        #print cls_scores.shape, label.shape
        keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0]
        cls_masks = cls_masks[keep, :, :]
        dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
        keep = nms(dets)
        #print dets.shape, cls_masks.shape
        all_boxes[cls_ind] = dets[keep, :]
        all_masks[cls_ind] = cls_masks[keep, :, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
    masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))]

    import copy
    import random
    class_names = CLASSES
    color_white = (255, 255, 255)
    scale = 1.0
    im = copy.copy(img_ori)

    for j, name in enumerate(class_names):
        if name == '__background__':
            continue
        color = (random.randint(0, 256), random.randint(0, 256),
                 random.randint(0, 256))  # generate a random color
        dets = boxes_this_image[j]
        masks = masks_this_image[j]
        for i in range(len(dets)):
            bbox = dets[i, :4] * scale
            if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[
                    1] or bbox[2] == bbox[3]:
                continue
            score = dets[i, -1]
            bbox = map(int, bbox)
            cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                          color=color,
                          thickness=2)
            cv2.putText(im,
                        '%s %.3f' % (class_names[j], score),
                        (bbox[0], bbox[1] + 10),
                        color=color_white,
                        fontFace=cv2.FONT_HERSHEY_COMPLEX,
                        fontScale=0.5)
            mask = masks[i, :, :]
            mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])),
                              interpolation=cv2.INTER_LINEAR)
            mask[mask > 0.5] = 1
            mask[mask <= 0.5] = 0
            mask_color = random.randint(0, 255)
            c = random.randint(0, 2)
            target = im[bbox[1]:bbox[3], bbox[0]:bbox[2],
                        c] + mask_color * mask
            target[target >= 255] = 255
            im[bbox[1]:bbox[3], bbox[0]:bbox[2], c] = target
    ##im = im[:,:,(2,1,0)]
    ##plt.imshow(im)
    cv2.imwrite("figures/test_result.jpg", im)
def demo_maskrcnn(network,
                  ctx,
                  prefix,
                  epoch,
                  vis=True,
                  has_rpn=True,
                  thresh=0.001):

    assert has_rpn, "Only has_rpn==True has been supported."
    sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES)
    arg_params, aux_params = load_param(prefix,
                                        epoch,
                                        convert=True,
                                        ctx=ctx,
                                        process=True)

    max_image_shape = (1, 3, 1024, 1024)
    max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))]
    mod = MutableModule(symbol=sym,
                        data_names=["data", "im_info"],
                        label_names=None,
                        max_data_shapes=max_data_shapes,
                        context=ctx)
    mod.bind(data_shapes=max_data_shapes,
             label_shapes=None,
             for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    class OneDataBatch():
        def __init__(self, img):
            im_info = mx.nd.array([[img.shape[0], img.shape[1], 1.0]])
            img = np.transpose(img, (2, 0, 1))
            img = img[np.newaxis, (2, 1, 0)]
            self.data = [mx.nd.array(img), im_info]
            self.label = None
            self.provide_label = None
            self.provide_data = [("data", (1, 3, img.shape[2], img.shape[3])),
                                 ("im_info", (1, 3))]

    #img_ori = cv2.imread(img_path)
    #batch = OneDataBatch(img_ori)
    #mod.forward(batch, False)
    #results = mod.get_outputs()
    #output = dict(zip(mod.output_names, results))
    #rois = output['rois_output'].asnumpy()[:, 1:]

    #scores = output['cls_prob_reshape_output'].asnumpy()[0]
    #bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
    #mask_output = output['mask_prob_output'].asnumpy()

    #pred_boxes = bbox_pred(rois, bbox_deltas)
    #pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0],img_ori.shape[1]])

    #nms = py_nms_wrapper(config.TEST.NMS)

    #boxes= pred_boxes

    #CLASSES  = ('__background__', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'mcycle', 'bicycle')
    #CLASSES  = ('__background__', 'text')
    #all_boxes = [[[] for _ in xrange(1)]
    #             for _ in xrange(len(CLASSES))]
    #all_masks = [[[] for _ in xrange(1)]
    #             for _ in xrange(len(CLASSES))]
    #label = np.argmax(scores, axis=1)
    #label = label[:, np.newaxis]

    #for cls in CLASSES:
    #    cls_ind = CLASSES.index(cls)
    #    cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
    #    cls_masks = mask_output[:, cls_ind, :, :]
    #    cls_scores = scores[:, cls_ind, np.newaxis]
    #    #print cls_scores.shape, label.shape
    #    keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0]
    #    cls_masks = cls_masks[keep, :, :]
    #    dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
    #    keep = nms(dets)
    #    #print dets.shape, cls_masks.shape
    #    all_boxes[cls_ind] = dets[keep, :]
    #    all_masks[cls_ind] = cls_masks[keep, :, :]

    #boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
    #masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))]

    #import copy
    #import random

# class_names = CLASSES
#color_white = (255, 255, 255)
#scale = 1.0
#im = copy.copy(img_ori)

#for j, name in enumerate(class_names):
#     if name == '__background__':
#        continue
#    color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))  # generate a random color
#    dets = boxes_this_image[j]
#    masks = masks_this_image[j]
#    for i in range(len(dets)):
#        bbox = dets[i, :4] * scale
#        if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3]  :
#            continue
#        score = dets[i, -1]
#        bbox = map(int, bbox)
#        cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2)
#        cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10),
#                    color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5)
#        mask = masks[i, :, :]
#        mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR)
#3

#    mask[mask > 0.5] = 1
#        mask[mask <= 0.5] = 0
#        mask_color = random.randint(0, 255)
#        c = random.randint(0, 2)
#        target = im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] + mask_color * mask
#        target[target >= 255] = 255
#        im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] = target
#im = im[:,:,(2,1,0)]
#cv2.imwrite("figures/test_result.jpg",im)
#plt.imshow(im)
#fig1 = plt.gcf()
#plt.savefig("figures/test_result.jpg")
#if vis:
#plt.show()
#else:
    imglist_file = os.path.join(default.dataset_path, 'imglists', 'test.lst')
    assert os.path.exists(imglist_file), 'Path does not exist: {}'.format(
        imglist_file)
    imgfiles_list = []
    with open(imglist_file, 'r') as f:
        for line in f:
            file_list = dict()
            label = line.strip().split('\t')
            #file_list['img_id'] = label[0]
            file_list['img_path'] = label[1]
            #file_list['ins_seg_path'] = label[2].replace('labelTrainIds', 'instanceIds')
            imgfiles_list.append(file_list)

    #assert len(imgfiles_list) == self.num_images, 'number of boxes matrix must match number of images'
    roidb = []
    index = 0
    for im in range(len(imgfiles_list)):
        #print '===============================', im, '====================================='
        #roi_rec = dict()
        #img_path = os.path.join(self.data_path, imgfiles_list[im]['img_path'])
        index = im + 1
        img_path = os.path.join(default.dataset_path, 'ch4_test_images',
                                'img_' + str(index) + '.jpg')
        #size = cv2.imread(roi_rec['image']).shape
        #roi_rec['height'] = size[0]
        #roi_rec['width'] = size[1]
        #img_path = os.path.join(img_path, 'img_' + index + '.jpg')

        img_ori = cv2.imread(img_path)
        #img_ori = cv2.resize(img_ori, (, 28), interpolation=cv2.INTER_NEAREST)
        batch = OneDataBatch(img_ori)
        mod.forward(batch, False)
        results = mod.get_outputs()
        output = dict(zip(mod.output_names, results))
        rois = output['rois_output'].asnumpy()[:, 1:]

        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        mask_output = output['mask_prob_output'].asnumpy()

        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes,
                                [img_ori.shape[0], img_ori.shape[1]])

        #nms = py_nms_wrapper(config.TEST.NMS)
        nms = processing_nms_wrapper(config.TEST.NMS, 0.8)
        boxes = pred_boxes

        CLASSES = ('__background__', 'text')

        all_boxes = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))]
        all_masks = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))]
        label = np.argmax(scores, axis=1)
        label = label[:, np.newaxis]

        for cls in CLASSES:
            cls_ind = CLASSES.index(cls)
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_masks = mask_output[:, cls_ind, :, :]
            cls_scores = scores[:, cls_ind, np.newaxis]
            #print cls_scores.shape, label.shape
            keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0]
            cls_masks = cls_masks[keep, :, :]
            dets = np.hstack(
                (cls_boxes, cls_scores)).astype(np.float32)[keep, :]
            keep = nms(dets)
            #print dets.shape, cls_masks.shape
            all_boxes[cls_ind] = dets[keep, :]
            all_masks[cls_ind] = cls_masks[keep, :, :]

        boxes_this_image = [[]
                            ] + [all_boxes[j] for j in range(1, len(CLASSES))]
        masks_this_image = [[]
                            ] + [all_masks[j] for j in range(1, len(CLASSES))]

        import copy
        import random
        class_names = CLASSES
        color_white = (255, 255, 255)
        scale = 1.0
        im = copy.copy(img_ori)
        num_boxes = 0

        for j, name in enumerate(class_names):
            if name == '__background__':
                continue
            color = (random.randint(0, 256), random.randint(0, 256),
                     random.randint(0, 256))  # generate a random color
            dets = boxes_this_image[j]
            masks = masks_this_image[j]
            for i in range(len(dets)):
                #num_boxes += 1
                bbox = dets[i, :4] * scale
                #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3]  :
                if bbox[2] == bbox[0] or bbox[3] == bbox[1]:
                    continue
                num_boxes += 1
                score = dets[i, -1]
                bbox = map(int, bbox)
                cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                              color=color,
                              thickness=2)
                cv2.putText(im,
                            '%s %.3f' % (class_names[j], score),
                            (bbox[0], bbox[1] + 10),
                            color=color_white,
                            fontFace=cv2.FONT_HERSHEY_COMPLEX,
                            fontScale=0.5)
                mask = masks[i, :, :]
                mask = cv2.resize(mask,
                                  (bbox[2] - bbox[0], (bbox[3] - bbox[1])),
                                  interpolation=cv2.INTER_LINEAR)
                mask[mask > 0.5] = 1
                mask[mask <= 0.5] = 0

                px = np.where(mask == 1)
                x_min = np.min(px[1])
                y_min = np.min(px[0])
                x_max = np.max(px[1])
                y_max = np.max(px[0])
                #if x_max - x_min <= 1 or y_max - y_min <= 1:
                #    continue
                im_binary = np.zeros(im[:, :, 0].shape)
                im_binary[bbox[1]:bbox[3],
                          bbox[0]:bbox[2]] = im_binary[bbox[1]:bbox[3],
                                                       bbox[0]:bbox[2]] + mask
                mask_color = random.randint(0, 255)
                c = random.randint(0, 2)
                target = im[bbox[1]:bbox[3], bbox[0]:bbox[2],
                            c] + mask_color * mask
                target[target >= 255] = 255
                im[bbox[1]:bbox[3], bbox[0]:bbox[2], c] = target
                #cv2.imwrite("figures/test_result.jpg",im)
                inst_dir = os.path.join(default.dataset_path, 'test_mat')
                if not os.path.exists(inst_dir):
                    os.makedirs(inst_dir)
                inst_path = os.path.join(
                    inst_dir, 'result_{}_{}.mat'.format(index, num_boxes))
                io.savemat(inst_path, {'Segmentation': im_binary})
        numbox = open('data/boxnum.txt', 'a')
        numbox.write(str(num_boxes) + '\n')
        numbox.close()
        img_dir = os.path.join(default.dataset_path, 'test_result_img')
        if not os.path.exists(img_dir):
            os.makedirs(img_dir)
        img_path = os.path.join(img_dir, 'result_{}.jpg'.format(index))
        cv2.imwrite(img_path, im)
def demo_maskrcnn(network, ctx, prefix, epoch,img_path,
                   vis= True, has_rpn = True, thresh = 0.001):
    
    assert has_rpn,"Only has_rpn==True has been supported."
    sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)
    
    max_image_shape = (1,3,1024,1024)
    max_data_shapes = [("data",max_image_shape),("im_info",(1,3))]
    mod = MutableModule(symbol = sym, data_names = ["data","im_info"], label_names= None,
                            max_data_shapes = max_data_shapes,
                              context=ctx)
    mod.bind(data_shapes = max_data_shapes, label_shapes = None, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    class OneDataBatch():
        def __init__(self,img):
            im_info = mx.nd.array([[img.shape[0],img.shape[1],1.0]])
            img = np.transpose(img,(2,0,1)) 
            img = img[np.newaxis,(2,1,0)]
            self.data = [mx.nd.array(img),im_info]
            self.label = None
            self.provide_label = None
            self.provide_data = [("data",(1,3,img.shape[2],img.shape[3])),("im_info",(1,3))]
    
    img_ori = cv2.imread(img_path)
    batch = OneDataBatch(img_ori)
    mod.forward(batch, False)
    results = mod.get_outputs()
    output = dict(zip(mod.output_names, results))
    rois = output['rois_output'].asnumpy()[:, 1:]


    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
    mask_output = output['mask_prob_output'].asnumpy()

    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0],img_ori.shape[1]])

    nms = py_nms_wrapper(config.TEST.NMS)

    boxes= pred_boxes

    CLASSES  = ('__background__', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'mcycle', 'bicycle')

    all_boxes = [[[] for _ in xrange(1)]
                 for _ in xrange(len(CLASSES))]
    all_masks = [[[] for _ in xrange(1)]
                 for _ in xrange(len(CLASSES))]
    label = np.argmax(scores, axis=1)
    label = label[:, np.newaxis]

    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_masks = mask_output[:, cls_ind, :, :]
        cls_scores = scores[:, cls_ind, np.newaxis]
        #print cls_scores.shape, label.shape
        keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0]
        cls_masks = cls_masks[keep, :, :]
        dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
        keep = nms(dets)
        #print dets.shape, cls_masks.shape
        all_boxes[cls_ind] = dets[keep, :]
        all_masks[cls_ind] = cls_masks[keep, :, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
    masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))]


    import copy
    import random
    class_names = CLASSES
    color_white = (255, 255, 255)
    scale = 1.0
    im = copy.copy(img_ori)

    for j, name in enumerate(class_names):
        if name == '__background__':
            continue
        color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))  # generate a random color
        dets = boxes_this_image[j]
        masks = masks_this_image[j]
        for i in range(len(dets)):
            bbox = dets[i, :4] * scale
            if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3]  :
                continue
            score = dets[i, -1]
            bbox = map(int, bbox)
            cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2)
            cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10),
                        color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5)
            mask = masks[i, :, :]
            mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR)
            mask[mask > 0.5] = 1
            mask[mask <= 0.5] = 0
            mask_color = random.randint(0, 255)
            c = random.randint(0, 2)
            target = im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] + mask_color * mask
            target[target >= 255] = 255
            im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] = target
    im = im[:,:,(2,1,0)]
    plt.imshow(im)

    if vis:
        plt.show()
    else:
        plt.savefig("figures/test_result.jpg")
    def detect(self, img, scales=[1.], thresh=0.5):
        ret = []
        #scale = scales[0]
        dets_all = None
        masks_all = None
        for scale in scales:
            if scale != 1.0:
                nimg = cv2.resize(img,
                                  None,
                                  None,
                                  fx=scale,
                                  fy=scale,
                                  interpolation=cv2.INTER_LINEAR)
            else:
                nimg = img
            im_size = nimg.shape[0:2]
            #im_info = mx.nd.array([[nimg.shape[0],nimg.shape[1],1.0]])
            #nimg = np.transpose(nimg,(2,0,1))
            #nimg = nimg[np.newaxis,(2,1,0)]
            #nimg = mx.nd.array(nimg)
            #db = mx.io.DataBatch(data=(nimg,im_info))
            db = OneDataBatch(nimg)
            self.model.forward(db, is_train=False)
            results = self.model.get_outputs()
            output = dict(zip(self.model.output_names, results))
            rois = output['rois_output'].asnumpy()[:, 1:]
            scores = output['cls_prob_reshape_output'].asnumpy()[0]
            bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
            mask_output = output['mask_prob_output'].asnumpy()
            pred_boxes = bbox_pred(rois, bbox_deltas)
            pred_boxes = clip_boxes(pred_boxes, [im_size[0], im_size[1]])
            boxes = pred_boxes
            label = np.argmax(scores, axis=1)
            label = label[:, np.newaxis]
            cls_ind = 1  #text class
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] / scale
            cls_masks = mask_output[:, cls_ind, :, :]
            cls_scores = scores[:, cls_ind, np.newaxis]
            #print cls_scores.shape, label.shape
            keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0]
            dets = np.hstack(
                (cls_boxes, cls_scores)).astype(np.float32)[keep, :]
            masks = cls_masks[keep, :, :]
            if dets.shape[0] == 0:
                continue
            if dets_all is None:
                dets_all = dets
                masks_all = masks
            else:
                dets_all = np.vstack((dets_all, dets))
                masks_all = np.vstack((masks_all, masks))
            #scores = dets[:,4]
            #index = np.argsort(scores)[::-1]
            #dets = dets[index]
            #print(dets)
        if dets_all is None:
            return np.zeros((0, 2))
        dets = dets_all
        masks = masks_all

        keep = self.nms(dets)
        dets = dets[keep, :]
        masks = masks[keep, :, :]

        det_mask = np.zeros((dets.shape[0], ) + img.shape[0:2], dtype=np.int)
        mask_n = np.zeros((dets.shape[0], ), dtype=np.int)
        invalid = np.zeros((dets.shape[0], ), dtype=np.int)
        for i in range(dets.shape[0]):
            bbox_i = dets[i, :4]
            #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3]  :
            if bbox_i[2] == bbox_i[0] or bbox_i[3] == bbox_i[1]:
                invalid[i] = 1
                continue
            score_i = dets[i, -1]
            #bbox_i = map(int, bbox_i)
            bbox_i = bbox_i.astype(np.int)
            mask_i = masks[i, :, :]
            mask_i = cv2.resize(mask_i, (bbox_i[2] - bbox_i[0],
                                         (bbox_i[3] - bbox_i[1])),
                                interpolation=cv2.INTER_LINEAR)
            #avg_mask = np.mean(mask_i[mask_i>0.5])
            #print('det', i, 'mask avg', avg_mask)
            mask_i[mask_i > 0.5] = 1
            mask_i[mask_i <= 0.5] = 0
            det_mask[i, bbox_i[1]:bbox_i[3],
                     bbox_i[0]:bbox_i[2]] += mask_i.astype(np.int)
            mask_n[i] = np.sum(mask_i == 1)

        if self.mask_nms:
            for i in range(dets.shape[0]):
                if invalid[i] > 0:
                    continue
                mask_i = det_mask[i]
                ni = mask_n[i]
                merge_list = []
                for j in range(i + 1, dets.shape[0]):
                    if invalid[j] > 0:
                        continue
                    mask_j = det_mask[j]
                    nj = mask_n[j]
                    mask_inter = mask_i + mask_j
                    nij = np.sum(mask_inter == 2)
                    iou = float(nij) / (ni + nj - nij)
                    iou_i = float(nij) / ni
                    iou_j = float(nij) / nj
                    if iou_j > 0.7:
                        invalid[j] = 1
                    if iou >= config.TEST.NMS:
                        #if iou>=0.7:
                        invalid[j] = 1
                        if iou >= MERGE_THRESH:
                            merge_list.append(j)
                            #mask_i = np.logical_or(mask_i, mask_j, dtype=np.int).astype(np.int)
                            #det_mask[i] = mask_i
                            #print(mask_i)
                for mm in merge_list:
                    _mask = det_mask[mm]
                    mask_i = np.logical_or(mask_i, _mask, dtype=np.int)
                if len(merge_list) > 0:
                    det_mask[i] = mask_i.astype(np.int)

        for i in range(dets.shape[0]):
            if invalid[i] > 0:
                continue
            mask_i = det_mask[i]

            mini_box = minimum_bounding_rectangle(mask_i)
            mini_boxt = np.zeros((4, 2))
            mini_boxt[0][0] = mini_box[0][1]
            mini_boxt[0][1] = mini_box[0][0]
            mini_boxt[1][0] = mini_box[1][1]
            mini_boxt[1][1] = mini_box[1][0]
            mini_boxt[2][0] = mini_box[2][1]
            mini_boxt[2][1] = mini_box[2][0]
            mini_boxt[3][0] = mini_box[3][1]
            mini_boxt[3][1] = mini_box[3][0]
            mini_box = mini_boxt
            mini_box = np.int32(mini_box)
            ret.append(mini_box)
            #scores.append(score_i)
            #print("---------------",mini_box)
            #cv2.polylines(im, [mini_box],  1, (255,255,255))
            #submit_path = os.path.join(submit_dir,'res_img_{}.txt'.format(index))
            #result_txt = open(submit_path,'a')
            #for i in range(0,4):
            #    result_txt.write(str(mini_box[i][0]))
            #    result_txt.write(',')
            #    result_txt.write(str(mini_box[i][1]))
            #    if i < 3:
            #        result_txt.write(',')
            #result_txt.write('\r\n')
            #result_txt.close()
        return ret