def detect(self, text_proposals, scores, size):
        # 删除得分较低的proposal
        keep_inds=np.where(scores>TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0]
        text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]

        # 按得分排序
        sorted_indices=np.argsort(scores.ravel())[::-1]
        text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices]

        # 对proposal做nms
        keep_inds=nms(np.hstack((text_proposals, scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
        text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]

        # 获取检测结果
        scores=normalize(scores)
        text_recs=self.text_proposal_connector.get_text_lines(text_proposals, scores, size)
        
        # 过滤boxes
        keep_inds=self.filter_boxes(text_recs)
        text_lines=text_recs[keep_inds]
        
        # 对lines做nms
        if text_lines.shape[0] != 0:
            keep_inds=nms(text_lines, TextLineCfg.TEXT_LINE_NMS_THRESH)
            text_lines=text_lines[keep_inds]
        
        return text_lines
    def detect(self, text_proposals, scores, size):
        """
        Detecting texts from an image
        :return: the bounding boxes of the detected texts
        """
        # text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN)
        keep_inds = np.where(scores > cfg.TEXT_PROPOSALS_MIN_SCORE)[0]
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]

        sorted_indices = np.argsort(scores.ravel())[::-1]
        text_proposals, scores = text_proposals[sorted_indices], scores[sorted_indices]

        # nms for text proposals
        keep_inds = nms(np.hstack((text_proposals, scores)), cfg.TEXT_PROPOSALS_NMS_THRESH)
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]

        scores = normalize(scores)

        text_lines = self.text_proposal_connector.get_text_lines(text_proposals, scores, size)

        keep_inds = self.filter_boxes(text_lines)
        text_lines = text_lines[keep_inds]

        if text_lines.shape[0] != 0:
            keep_inds = nms(text_lines, cfg.TEXT_LINE_NMS_THRESH)
            text_lines = text_lines[keep_inds]

        return text_lines
Beispiel #3
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
Beispiel #4
0
def demo(sess, net, image_name, thresh=0.05):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    image = PIL.Image.open(image_name)
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()

    im_num = os.path.split(image_name)[1].split('.')[0]
    scores, boxes = im_detect(sess,
                              net,
                              im,
                              save_feature=True,
                              feature_path='./data/conv.npy')
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    # fig, ax = plt.subplots(figsize=(12, 12))
    # ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.7
    NMS_THRESH = 0.3
    results = []
    name = image_name.split('/')[-1]
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        cls_lables = np.full_like(cls_scores, cls_ind)
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis],
                          cls_lables[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        inds = np.where(dets[:, -2] > thresh)[0]
        dets = dets[inds]
        for i in range(dets.shape[0]):
            name = str(name)
            category = int(dets[i, -1])
            bbox = list(map(float, dets[i, :4]))
            bbox = [round(b, 2) for b in bbox]
            score = float(dets[i, -2])
            dic = collections.OrderedDict()
            dic['name'] = str(name)
            dic['category'] = int(category)
            dic['bbox'] = bbox
            dic['score'] = float(score)
            results.append(dic)
        im = vis_detections(image, cls, dets, ax=None, thresh=CONF_THRESH)

    out_path = './data/detection_result'
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_path = os.path.join(out_path, os.path.split(image_name)[-1])
    image.save(out_path)
Beispiel #5
0
def draw_densecap(image, scores, rois, im_info, cap_probs, bbox_pred):
    """
    bbox_pred: [None, 4]
    rois: [None, 5]

    """
    # for bbox unnormalization

    bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1, 4))
    bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1, 4))

    boxes = rois[:, 1:5] / im_info[2]
    # [None, 12]
    cap_ids = np.argmax(cap_probs, axis=1).reshape((-1, cfg.TIME_STEPS))

    # bbox target unnormalization
    box_deltas = bbox_pred * bbox_stds + bbox_mean

    # do the transformation
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, image.shape)

    pos_dets = np.hstack(
        (pred_boxes, scores[:, 1][:, np.newaxis])).astype(np.float32,
                                                          copy=False)
    keep = nms(pos_dets, cfg.TEST.NMS)
    pos_boxes = boxes[keep, :]
    cap_ids = cap_ids[keep, :]
    im_info[2] = 1.
    img_cap = draw_bounding_boxes(image, pos_boxes, im_info, cap_ids)

    return img_cap
Beispiel #6
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
Beispiel #7
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    new_scores = scores[:, np.newaxis]

    keep_inds = np.where(new_scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0]
    boxes, new_scores = boxes[keep_inds], new_scores[keep_inds]

    sorted_indices = np.argsort(new_scores.ravel())[::-1]
    boxes, new_scores = boxes[sorted_indices], new_scores[sorted_indices]

    keep_inds = nms(np.hstack((boxes, new_scores)),
                    TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
    boxes, new_scores = boxes[keep_inds], new_scores[keep_inds]

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 14))

    for key, box in enumerate(boxes):
        img_inside = img.copy()
        img_inside = cv2.rectangle(img_inside, (box[0], box[1]),
                                   (box[2], box[3]),
                                   color=(255, 0, 0),
                                   thickness=2)
        plt.imshow(img_inside)
        plt.title('Scores: {0}'.format(scores[key]))
        plt.savefig('./data/fig/fig_{0}.jpg'.format(key))
Beispiel #8
0
    def process_frame(self, video_name, im_name, CLASSES, CONF_THRESH):
        # Output frame path
        im_path_ = os.path.join(api_config.upload_folder,
                                video_name.split(".")[0],
                                "annotated-frames", os.path.basename(im_name))
        im = np.array(Image.open(im_name))
        im = im[:, :, ::-1]
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(self.sess, self.net, im)
        timer.toc()
        print ('Detection took {:.3f}s for '
               '{:d} object proposals').format(timer.total_time,
                                               boxes.shape[0])

        NMS_THRESH = 0.3
        im = im[:, :, (2, 1, 0)]
        fig, ax = plt.subplots(figsize=(12, 12))
        ax.imshow(im, aspect='equal')
        self.annotation = xml_setup(im_name, im.shape)
        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            self.draw(im_path_, cls, dets, ax, thresh=CONF_THRESH)
        xml_write(video_name, os.path.basename(im_name), self.annotation)
        plt.savefig(im_path_, bbox_inches='tight')
        plt.close()
Beispiel #9
0
    def detect(self, text_proposals, scores, size):
        # 删除得分较低的proposal
        # 使用一个标准的非极大值抑制算法来滤除多余的proposals
        keep_inds = np.where(scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0]
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]

        # 按得分排序(逆序)
        sorted_indices = np.argsort(scores.ravel())[::-1]
        text_proposals, scores = text_proposals[sorted_indices], scores[
            sorted_indices]

        # 对proposal做nms
        # THRESH是nms的参数
        keep_inds = nms(np.hstack((text_proposals, scores)),
                        TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]

        # 获取检测结果
        # 文本线构造算法(多个细长的proposal合并成一条文本线)(边缘细化)
        # 得到筛选过后的boxes
        text_recs = self.text_proposal_connector.get_text_lines(
            text_proposals, scores, size)
        # 再次得到得到筛选过后的boxes的indexes
        keep_inds = self.filter_boxes(text_recs)
        return text_recs[keep_inds]
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
     For details please see the technical report
  """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    if cfg.DEBUG_ALL:
        print ('number of proposals before clip boxes to image board: {}'.format(
            proposals.shape[0]
        ))
    proposals = clip_boxes(proposals, im_info[:2])

    # remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    if cfg.FILTER_SMALL_BOX:
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    if cfg.DEBUG_ALL:
        print("number of proposals before nms: {}".format(proposals.shape[0]))
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if cfg.DEBUG_ALL:
        print("number of proposals after nms: {}".format(len(keep)))

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Beispiel #11
0
def nms_detections(pred_boxes, scores, nms_thresh, inds=None):
    keep = range(scores.shape[0])
    keep, scores, pred_boxes = zip(*sorted(zip(keep, scores, pred_boxes), key=lambda x: x[1][0])[::-1])
    keep, scores, pred_boxes = np.array(keep), np.array(scores), np.array(pred_boxes)
    dets = np.hstack((pred_boxes, scores[:,0][:, np.newaxis])).astype(np.float32)
    keep_keep = nms(dets, nms_thresh)
    keep_keep = keep_keep[:min(100, len(keep_keep))]
    keep = keep[keep_keep]
    if inds is None:
        return pred_boxes[keep_keep], scores[keep_keep], keep
    return pred_boxes[keep_keep], scores[keep_keep], inds[keep], keep
Beispiel #12
0
def test_im(sess, net, im_path, vocab, vis=True):
    im = cv2.imread(im_path)
    scores, boxes, captions = im_detect(sess, net, im, None, use_box_at=-1)
    pos_dets = np.hstack((boxes, scores[:, np.newaxis])) \
        .astype(np.float32, copy=False)
    keep = nms(pos_dets, cfg.TEST.NMS)
    pos_dets = pos_dets[keep, :]
    pos_scores = scores[keep]
    pos_captions = [sentence(vocab, captions[idx]) for idx in keep]
    pos_boxes = boxes[keep, :]
    if vis:
        vis_detections(im_path, im, pos_captions, pos_dets, save_path='./demo')
def caption(sess, inp):
    img = np.array(inp['image'])
    scores, boxes, captions = im_detect(sess, net, img, None, use_box_at=-1)
    pos_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
    keep = nms(pos_dets, cfg.TEST.NMS)
    pos_dets = pos_dets[keep, :]
    pos_scores = scores[keep]
    pos_captions = [sentence(vocab, captions[idx]) for idx in keep]
    pos_boxes = boxes[keep, :]
    return dict(captions=np.array(pos_captions),
                scores=np.array(pos_scores),
                boxes=np.array(pos_boxes))
Beispiel #14
0
def interpret_objects(cls_prob,
                      bbox_pred,
                      rois,
                      im_info,
                      nms_thres=-1.,
                      min_score=0.00001,
                      use_gt_boxes=False,
                      max_per_image=2000):
    box_deltas = bbox_pred.data.cpu().numpy()
    cls_prob = cls_prob.data.cpu().numpy()
    all_boxes = [[] for _ in xrange(cls_prob.shape[1])]

    for j in xrange(1, cls_prob.shape[1]):  # skip the background
        inds = np.where(cls_prob[:, j] > min_score)[0]
        if len(inds) == 0:
            continue
        cls_scores = cls_prob[inds, j]
        if use_gt_boxes:
            cls_boxes = rois.data.cpu().numpy()[inds, 1:5] / im_info[0][2]
        else:
            t_box_deltas = np.asarray(
                [box_deltas[i, (j * 4):(j * 4 + 4)] for i in inds],
                dtype=np.float)
            cls_boxes = bbox_transform_inv_hdn(
                rois.data.cpu().numpy()[inds, 1:5],
                t_box_deltas) / im_info[0][2]
            cls_boxes = clip_boxes(cls_boxes, im_info[0][:2] / im_info[0][2])

        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
            .astype(np.float32, copy=False)
        if nms_thres > 0.:
            keep = nms(cls_dets, nms_thres)
            cls_dets = cls_dets[keep, :]

        all_boxes[j] = cls_dets

    if max_per_image > 0:
        image_scores = np.hstack([
            all_boxes[j][:, -1] for j in xrange(1, cls_prob.shape[1])
            if len(all_boxes[j]) > 0
        ])
        #print('{} detections.'.format(len(image_scores)))
        if len(image_scores) > max_per_image:
            image_thresh = np.sort(image_scores)[-max_per_image]
            for j in xrange(1, cls_prob.shape[1]):
                if len(all_boxes[j]) == 0:
                    continue
                keep = np.where(all_boxes[j][:, -1] >= image_thresh)[0]
                all_boxes[j] = all_boxes[j][keep, :]

    return all_boxes
Beispiel #15
0
    def detect(self,
               img,
               ret=dict(),
               net='VGGnet_test',
               model=os.path.join(local_dir,
                                  'models/VGGnet_fast_rcnn_iter_150000.ckpt')):
        """Detect object classes in an image using pre-computed object proposals."""
        self.sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))
        # load network
        self.net = get_network(net)
        #load model
        print('Loading network {:s}... '.format(net)),
        self.saver = tf.train.Saver()
        self.saver.restore(self.sess, model)
        print(' done.')

        starttime = time.time()
        # Load the demo image
        im = cv2.imread(img)
        print im.shape
        im = cv2.resize(im, (int((400.0 / im.shape[0]) * im.shape[1]), 400))
        print im.shape
        # Detect all object classes and regress object bounds
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(self.sess, self.net, im)
        timer.toc()
        print('Detection took {:.3f}s for '
              '{:d} object proposals').format(timer.total_time, boxes.shape[0])

        # Visualize detections for each class
        # im = im[:, :, (2, 1, 0)]
        # fig, ax = plt.subplots(figsize=(12, 12))
        # ax.imshow(im, aspect='equal')
        CONF_THRESH = 0.8
        NMS_THRESH = 0.3
        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            im = self.vis_detections(im, cls, dets, CONF_THRESH)
        endtime = time.time()
        ret['result'] = (True, "%.3f" % (endtime - starttime))
        ret['drawImg'] = im
        print "finish detecting"
        return im
Beispiel #16
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.2
    NMS_THRESH = 0.2

    cls_name = dict(zip(np.arange(len(CLASSES)), CLASSES))
    cls_matrix = np.arange(len(CLASSES)).reshape([1, -1]) + np.zeros(
        [boxes.shape[0], 1])
    cls_scores_fg = scores[:, 1:len(CLASSES)]
    cls_boxes_fg = boxes[:, 4:4 * (len(CLASSES))]
    cls_matrix_fg = cls_matrix[:, 1:len(CLASSES)]

    cls_scores_fg = cls_scores_fg.reshape([-1, 1])
    cls_boxes_fg = cls_boxes_fg.reshape([-1, 4])
    cls_matrix_fg = cls_matrix_fg.reshape([-1, 1])

    keeps = np.where(cls_scores_fg >= CONF_THRESH)[0]

    cls_scores_fg = cls_scores_fg[keeps]
    cls_boxes_fg = cls_boxes_fg[keeps]
    cls_matrix_fg = cls_matrix_fg[keeps]

    dets = np.hstack((cls_boxes_fg, cls_scores_fg)).astype(np.float32)
    keep = nms(dets, NMS_THRESH)
    print len(keep)
    dets = dets[keep, :]
    cls_matrix_fg = cls_matrix_fg[keep, :]
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1
        cls_fg_ind = np.where(cls_matrix_fg == cls_ind)[0]
        detses = dets[cls_fg_ind, :]
        vis_detections(im, cls, detses, ax, thresh=CONF_THRESH)
Beispiel #17
0
 def find_objects(self, input_image):
     # input_image = input_image.astype(float)
     # start_time = time.time()
     caffe.set_mode_gpu()
     caffe.set_device(0)
     scores, boxes = im_detect(self.net, input_image)
     # print 'CNN took: ', time.time() - start_time
     # Visualize detections for each class
     objects_detected = []
     CONF_THRESH = 0.7
     NMS_THRESH = 0.2
     class_index = 11
     class_name = 'tv'
     class_boxes = boxes[:, 4 * class_index:4 * (class_index + 1)]
     class_scores = scores[:, class_index]
     detections = np.hstack(
         (class_boxes, class_scores[:, np.newaxis])).astype(np.float32)
     keepers = nms(detections, NMS_THRESH)
     detections = detections[keepers, :]
     detections = detections[detections[:, -1] >= CONF_THRESH]
     detections[:,
                -1] = 3  # TODO: Hack so that TV returns as 3 which is tablet in the filter...
     objects_detected.append(detections)
     for cls_ind, cls in enumerate(self.class_list[15:]):
         cls_ind += 15  # because we skipped background
         cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
         cls_scores = scores[:, cls_ind]
         dets = np.hstack(
             (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
         keep = nms(dets, NMS_THRESH)
         dets = dets[keep, :]
         dets = dets[dets[:, -1] >= CONF_THRESH]
         dets[:, -1] = 2  # TODO: Hack so that all blocks have object ID 2
         # append data structure with format [[x1, y1, x2, y2, obj_id], [x1, y1, x2, y2, obj_id], ...] for all boxes
         objects_detected.append(dets)
     return objects_detected
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, args.IMAGE_PATH, image_name)
    #im__ = None
    im__ = cv2.imread(im_file)
    #print '----------', im__
    #if im__ == None:
    #    print " --- ERROR: Cannot Load Image: ", im_file
    #    return False

    result = im__

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im__)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im__ = im__[:, :, (2, 1, 0)]
    #fig, ax = plt.subplots(figsize=(12, 12))
    #ax.imshow(im__, aspect='equal')

    new_bboxes = []
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, args.NMS_THRESH)
        dets = dets[keep, :]
        #vis_detections(new_bboxes, result, cls, dets, ax, thresh=args.CONF_THRESH)
        vis_detections(new_bboxes, result, cls, dets, thresh=args.CONF_THRESH)


    #Save result
    img_obj = bb.IMGBBox()
    img_obj.img_name = image_name
    img_obj.xml_name = image_name.strip().split('.')[0] + '.xml'
    img_obj.setIMG(result)
    img_obj.saveIMG(args.save_img_dir, args.save_img_dir)
    img_obj.bboxes = new_bboxes
    img_obj.saveXML(args.save_xml_dir, args.save_xml_dir)
Beispiel #19
0
def ctpn(sess, net, image_name):
    img = cv2.imread(image_name)
    im = check_img(img)
    timer = Timer()
    timer.tic()
    scores, boxes = test_ctpn(sess, net, im)
    timer.toc()
    CONF_THRESH = 0.9
    NMS_THRESH = 0.3
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, NMS_THRESH)
    dets = dets[keep, :]

    keep = np.where(dets[:, 4] >= 0.7)[0]
    dets = dets[keep, :]
    line = connect_proposal(dets[:, 0:4], dets[:, 4], im.shape)
    save_results(image_name, im, line, thresh=0.9)
Beispiel #20
0
    def demo(self, image_name, is_init=True):
        """Detect object classes in an image using pre-computed object proposals."""

        # Detect all object classes and regress object bounds
        timer = Timer()
        timer.tic()
        if is_init:
            raw_scores, raw_boxes, self.feature_map, self.rpn_boxes, self.rpn_scores, self.im_scales = im_detect(
                self.sess, self.net, image_name, is_part=False)
            CONF_THRESH = self.score_thresh
            NMS_THRESH = self.nms_thresh
            self.objects = []
            for cls_ind, cls in enumerate(CLASSES[1:]):
                cls_ind += 1  # because we skipped background
                cls_boxes = raw_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
                cls_scores = raw_scores[:, cls_ind]
                dets = np.hstack(
                    (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
                keep = nms(dets, NMS_THRESH)
                dets = dets[keep, :]

                inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
                if len(inds) > 0:
                    for i in inds:
                        bbox = dets[i, :4]
                        score = dets[i, -1]
                        box_height = bbox[3] - bbox[1]
                        box_width = bbox[2] - bbox[0]
                        c_x = np.round(bbox[0] + box_width / 2.0)
                        c_y = np.round(bbox[1] + box_height / 2.0)
                        if cls == 'stawberry':
                            cls = 'strawberry'
                        object_coordinates = {
                            'name': cls,
                            'score': score,
                            'boxes': list([c_x, c_y, box_width, box_height])
                        }
                        self.objects.append(object_coordinates)
        else:
            _, _, self.feature_map, self.rpn_boxes, self.rpn_scores, self.im_scales = im_detect(
                self.sess, self.net, image_name, is_part=True)
        timer.toc()
def caption(sess, inp):
    img = np.array(inp['image'])
    width = img.shape[1]
    height = img.shape[0]
    scores, boxes, captions = im_detect(sess, net, img, None, use_box_at=-1)
    pos_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
    keep = nms(pos_dets, cfg.TEST.NMS)
    pos_dets = pos_dets[keep, :]
    pos_scores = scores[keep]
    pos_captions = [sentence(vocab, captions[idx]) for idx in keep]
    pos_boxes = boxes[keep, :]
    bboxes = []
    classes = []
    scores = []
    for i in range(min(inp['max_detections'], len(pos_captions))):
        bboxes.append(convert_rect(pos_boxes[i], width, height))
        classes.append(pos_captions[i])
        scores.append(float(pos_scores[i]))
    return dict(bboxes=bboxes, classes=classes, scores=scores)
Beispiel #22
0
    def detect(self, text_proposals,scores,size):
        # 删除得分较低的proposal 将低于概率0.7的框都不要了
        keep_inds=np.where(scores>TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0]
        text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]

        # 按得分排序
        sorted_indices=np.argsort(scores.ravel())[::-1]
        text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices]

        # 对proposal做nms ,TEXT_PROPOSALS_NMS_THRESH : 0.2
        keep_inds=nms(np.hstack((text_proposals, scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
        text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]

        # 获取检测结果
        text_recs=self.text_proposal_connector.get_text_lines(text_proposals, scores, size)
        # 最后检查框的高宽比,以及概率和最小宽度
        # 高宽比 需要大于 0.5
        # 概率   需要大于 0.9
        # 宽度   需要大于 16
        keep_inds=self.filter_boxes(text_recs)
        return text_recs[keep_inds]
Beispiel #23
0
    def detect(self, text_proposals, scores, size):
        # 删除得分较低的proposal
        keep_inds = np.where(scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0]
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]

        # 按得分排序
        sorted_indices = np.argsort(scores.ravel())[::-1]
        text_proposals, scores = text_proposals[sorted_indices], scores[
            sorted_indices]

        # 对proposal做nms
        keep_inds = nms(np.hstack((text_proposals, scores)),
                        TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
        #keep_inds = soft_nms(np.hstack((text_proposals, scores)),threshold=TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]

        # 获取检测结果
        text_recs = self.text_proposal_connector.get_text_lines(
            text_proposals, scores, size)
        keep_inds = self.filter_boxes(text_recs)
        return text_proposals, scores, text_recs[keep_inds]
Beispiel #24
0
def ctpn(sess, net, image_name):
    img = cv2.imread(image_name)
    im = check_img(img)
    timer = Timer()
    timer.tic()
    scores, boxes = test_ctpn(sess, net, im)
    timer.toc()
    # print('Detection took {:.3f}s for '
    #       '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.9
    NMS_THRESH = 0.3
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, NMS_THRESH)
    dets = dets[keep, :]

    keep = np.where(dets[:, 4] >= 0.7)[0]
    dets = dets[keep, :]
    line = connect_proposal(dets[:, 0:4], dets[:, 4], im.shape)
    save_results(image_name, im, line, thresh=0.9)
    parser.add_argument('--net', dest='demo_net', choices=['VGGnet_test', 'MSnet_test'], default='VGGnet_test')
    parser.add_argument('--model', dest='model', help='Model path', required=True)
    parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str)
    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    pprint.pprint(cfg)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    net = get_network(args.demo_net)
    print ('Loading network {:s}... '.format(args.demo_net)),
    saver = tf.train.Saver()
    saver.restore(sess, args.model)
    print (' done.')
    im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \
               glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg'))
    timer = Timer()
    for im_name in im_names:
        print 'Demo for {:s}'.format(im_name)
        im = cv2.imread(im_name)
        timer.tic()
        scores, boxes = im_detect_rpn(sess, net, im)
        timer.toc()
        print('Detection took {:.3f}s for {:d} object proposals').format(timer.total_time, boxes.shape[0])
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, 0.5)
        dets = dets[keep, :]
        vis_detections(im, dets, thresh=0.5)
Beispiel #26
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[
                       16,
                   ]):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    cfg_key = cfg_key.decode('ascii')
    # TODO 后期可能进行修改anchor的尺度,因为文本较为密集,需要进行完善修改
    # _anchors value
    # [[0    2   15   13]
    #  [0    0   15   15]
    #  [0   -4   15   19]
    #  [0   -9   15   24]
    #  [0  -16   15   31]
    #  [0  -26   15   41]
    #  [0  -41   15   56]
    #  [0  -62   15   77]
    #  [0  -91   15  106]
    #  [0 -134   15  149]]
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  #生成基本的10个anchor
    _num_anchors = _anchors.shape[0]  #10个anchor

    im_info = im_info[0]  #原始图像的高宽、缩放尺度

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  #12000,在做nms之前,最多保留的候选box数目
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  #2000,做完nms之后,最多保留的box的数目
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  #nms用参数,阈值是0.7
    min_size = cfg[cfg_key].RPN_MIN_SIZE  #候选box的最小尺寸,目前是16,高宽均要大于16

    height, width = rpn_cls_prob_reshape.shape[1:3]  #feature-map的高宽

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    # 获取第一个分类结果
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])
    #提取到object的分数,non-object的我们不关心
    #并reshape到1*H*W*10

    bbox_deltas = rpn_bbox_pred  #模型输出的pred是相对值,需要进一步处理成真实图像中的坐标
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    #print('w,h,x',width,height,width*height)

    # shift_x shape = [height, width]
    # 生成同样维度的两个矩阵
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    # print("shift_x", shift_x.shape)
    # print("shift_y", shift_y.shape)
    # shifts shape = [height*width,4]
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    #print("shift shape", shifts.shape)

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  # 10
    K = shifts.shape[0]  # height*width,[height*width,4]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    # print('_anchors.reshape((1, A, 4))',np.shape(_anchors.reshape((1, A, 4))))
    # print('shifts.reshape((1, K, 4)).transpose((1, 0, 2))',np.shape(shifts.reshape((1, K, 4)).transpose((1, 0, 2))))
    anchors = anchors.reshape((K * A, 4))  #这里得到的anchor就是整张图像上的所有anchor
    # print(anchors)

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.reshape((-1, 4))  #(HxWxA, 4)

    # Same story for the scores:
    scores = scores.reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)  #做逆变换,得到box在图像上的真实坐标

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals,
                           im_info[:2])  #将所有的proposal修建一下,超出图像范围的将会被修剪掉

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals,
                         min_size * im_info[2])  #移除那些proposal小于一定尺寸的proposal
    proposals = proposals[keep, :]  #保留剩下的proposal
    scores = scores[keep]
    bbox_deltas = bbox_deltas[keep, :]

    # # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]  #score按得分的高低进行排序
    if pre_nms_topN > 0:  #保留12000个proposal进去做nms
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    bbox_deltas = bbox_deltas[order, :]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)),
               nms_thresh)  #进行nms操作,保留2000个proposal
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    bbox_deltas = bbox_deltas[keep, :]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    blob = np.hstack(
        (scores.astype(np.float32,
                       copy=False), proposals.astype(np.float32, copy=False)))

    return blob, bbox_deltas
Beispiel #27
0
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [16,]):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    # cfg_key=cfg_key.decode('ascii')
    _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的9个anchor
    _num_anchors = _anchors.shape[0]#9个anchor

    im_info = im_info[0]#原始图像的高宽、缩放尺度

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'

    pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N#12000,在做nms之前,最多保留的候选box数目
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N#2000,做完nms之后,最多保留的box的数目
    nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH#nms用参数,阈值是0.7
    min_size      = cfg[cfg_key].RPN_MIN_SIZE#候选box的最小尺寸,目前是16,高宽均要大于16
    #TODO 后期需要修改这个最小尺寸,改为8?

    height, width = rpn_cls_prob_reshape.shape[1:3]#feature-map的高宽

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],
                        [1, height, width, _num_anchors])
    #提取到object的分数,non-object的我们不关心
    #并reshape到1*H*W*9

    bbox_deltas = rpn_bbox_pred#模型输出的pred是相对值,需要进一步处理成真实图像中的坐标
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))#这里得到的anchor就是整张图像上的所有anchor

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4)

    # Same story for the scores:
    scores = scores.reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)#做逆变换,得到box在图像上的真实坐标

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])#将所有的proposal修建一下,超出图像范围的将会被修剪掉

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])#移除那些proposal小于一定尺寸的proposal
    proposals = proposals[keep, :]#保留剩下的proposal
    scores = scores[keep]
    bbox_deltas=bbox_deltas[keep,:]


    # # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]#score按得分的高低进行排序
    if pre_nms_topN > 0:                #保留12000个proposal进去做nms
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    bbox_deltas=bbox_deltas[order,:]


    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)#进行nms操作,保留2000个proposal
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    bbox_deltas=bbox_deltas[keep,:]


    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    blob = np.hstack((scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False)))

    return blob,bbox_deltas
def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]):
    '''
    input[0],input[1],input[2], cfg_key, _feat_stride, anchor_scales
    :param rpn_cls_prob_reshape: 提取得到的bbox的是否保存的score, shape is N, W, H, 18, 其实就是区分是前景还是是背景 fg:前景,bg:背景
    :param rpn_bbox_pred: shape is N, W, H, 36, 提取得到的bbox的坐标 并不是ground truth
    :param im_info:
    :param cfg_key:
    :param _feat_stride:
    :param anchor_scales:
    :return:
    '''
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    # layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0,3,1,2])
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0,3,1,2])
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]
    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    # 在执行NMS(non-maximize suppress, 非最大抑制)之前最多的proposal的个数
    pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    # 在执行NMS(non-maximize suppress, 非最大抑制)之后最多的proposal的个数
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    # non-maximize suppress所使用的阈值
    nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
    # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
    min_size      = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]
    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    # 上面的操作其实是将features map的坐标映射到原图中的位置,方便计算IoU
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    # 所以说anchor和bounding box还是有一定区别的,对anchor进行一定的放缩处理后才是proposal 也就是bounding box
    # 至于放缩的系数是bbox_deltas 预测得到的
    proposals = bbox_transform_inv(anchors, bbox_deltas)
    # 2. clip predicted boxes to image,将proposal切割成合法尺寸
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
Beispiel #29
0
            timer.tic()
            scores, boxes = im_detect(sess, net, im)
            timer.toc()
            print('Detection took {:.3f}s for '
                  '{:d} object proposals').format(timer.total_time,
                                                  boxes.shape[0])

            CONF_THRESH = 0.7
            NMS_THRESH = 0.1
            for cls_ind, cls in enumerate(CLASSES[1:]):
                cls_ind += 1  # because we skipped background
                cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
                cls_scores = scores[:, cls_ind]
                dets = np.hstack(
                    (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
                keep = nms(dets, NMS_THRESH)
                dets = dets[keep, :]
                class_name = cls
                thresh = 0.5
                #                vis_detections(im, class_name, dets, ax, thresh=0.5):"""Draw detected bounding boxes."""
                inds = np.where(dets[:, -1] >= thresh)[0]
                if len(inds) == 0:
                    k = k + 1

                else:

                    f.write('video_' + str(cate) + ',' +
                            im_name.split('/')[-1] + ',' + class_name + ',' +
                            str(len(inds)) + ',')
                    for i in inds:
                        bbox = dets[i, :4]
Beispiel #30
0
def runForUI(imgWillBeDetected, imgOrigin, usegpu=0):

    # Setup caffe
    if usegpu >= 0:
        # caffe.mpi_init()
        caffe.set_mode_gpu()
        caffe.set_device(cfg.GPU_ID)
    else:
        # caffe.mpi_init()
        caffe.set_mode_cpu()

    # 设置应有的配置文件路径,此处仅仅是将其动态列举出来。更好的方法应该是放在一个文件里。有时间就完善
    gallery_def = 'models/psdb/resnet50/eval_gallery.prototxt'  # 所使用的gallery network的prototxt路径
    probe_def = 'models/psdb/resnet50/eval_probe.prototxt'  # 所使用的probe network的prototxt路径
    caffemodel = 'output/psdb_train/resnet50/resnet50_iter_50000.caffemodel'  # 训练的caffe模型的路径
    det_thresh = 0.75  # 可被监控的阈值
    cfg_file = 'experiments/cfgs/resnet50.yml'  # 配置文件路径
    set_cfgs = None

    # Get query image and roi
    query_img = imgOrigin
    query_roi = [0, 0, 1292, 3008]  # [x1, y1, x2, y2]

    # Extract feature of the query person
    net = caffe.Net(probe_def, caffemodel, caffe.TEST)

    roi = np.asarray(query_roi).astype(np.float32).reshape(1, 4)

    feature = _im_exfeat(net, query_img, roi, ['feat'])
    query_feat = feature['feat'].squeeze()

    # query_feat = demo_exfeat(net, query_img, query_roi)
    del net  # Necessary to release cuDNN conv static workspace

    # Detect and extract feature of persons in each gallery image
    net = caffe.Net(gallery_def, caffemodel, caffe.TEST)

    # Necessary to warm-up the net, otherwise the first image results are wrong
    # Don't know why. Possibly a bug in caffe's memory optimization.
    # Nevertheless, the results are correct after this warm-up.
    _im_detect(net, query_img)  # 这一步是由caffe的bug导致的,可能会出错。

    gallery_img = imgWillBeDetected

    boxes, scores, feat_dic = _im_detect(net, gallery_img, None, ['feat'])

    j = 1  # only consider j = 1 (foreground class)
    inds = np.where(scores[:, j] > det_thresh)[0]
    cls_scores = scores[inds, j]
    cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
    boxes = np.hstack((cls_boxes, cls_scores[:,
                                             np.newaxis])).astype(np.float32)
    keep = nms(boxes, cfg.TEST.NMS)

    boxes = boxes[keep]
    features = feat_dic['feat'][inds][keep]

    if boxes.shape[0] == 0:
        return None, None

    features = features.reshape(features.shape[0], -1)

    if boxes is None:
        print(gallery_img, 'no detections')
        return Cv2Imread(gallery_img)

    # Compute pairwise cosine similarities,
    #   equals to inner-products, as features are already L2-normed
    similarities = features.dot(query_feat)

    # Visualize the results
    fig, ax = plt.subplots(figsize=(16, 9))

    ax.imshow(plt.imread(gallery_img))
    plt.axis('off')

    for box, sim in zip(boxes, similarities):
        x1, y1, x2, y2, _ = box
        ax.add_patch(
            plt.Rectangle((x1, y1),
                          x2 - x1,
                          y2 - y1,
                          fill=False,
                          edgecolor='#4CAF50',
                          linewidth=3.5))
        ax.add_patch(
            plt.Rectangle((x1, y1),
                          x2 - x1,
                          y2 - y1,
                          fill=False,
                          edgecolor='white',
                          linewidth=1))
        ax.text(x1 + 5,
                y1 - 18,
                '{:.2f}'.format(sim),
                bbox=dict(facecolor='#4CAF50', linewidth=0),
                fontsize=20,
                color='white')

    plt.tight_layout()

    #将使用plt处理之后的图保存到内存中(提高处理速度,也可以保存到文档当中),并返回以供opencv-Python读取
    Buffer_ = BytesIO()  #申请缓存
    fig.savefig(Buffer_, format='png')
    Buffer_.seek(0)
    imgOutPut = PILImageOpen(Buffer_)
    Buffer_.close()

    del net

    return asarray(imgOutPut)
Beispiel #31
0
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_infos, 
                    _feat_stride, opts, anchor_scales, anchor_ratios,
                    mappings):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    # layer_params = yaml.load(self.param_str_)
    batch_size = rpn_cls_prob_reshape.shape[0]
    _anchors = generate_anchors.generate_anchors(scales=anchor_scales, ratios=anchor_ratios)
    _num_anchors = _anchors.shape[0]
    pre_nms_topN = opts['num_box_pre_NMS']
    post_nms_topN = opts['num_box_post_NMS']
    nms_thres = opts['nms_thres']
    min_size = opts['min_size']

    blob = []
    
    for i in range(batch_size):
        im_info = im_infos[i]
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        height = mappings[int(im_info[0])]
        width = mappings[int(im_info[1])]
        scores = rpn_cls_prob_reshape[i, _num_anchors:, :height, :width]
        bbox_deltas = rpn_bbox_pred[i, :, :height, :width]

        if DEBUG:
            print( 'im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print( 'scale: {}'.format(im_info[2]))
        if DEBUG:
            print( 'score map size: {}'.format(scores.shape))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * _feat_stride
        shift_y = np.arange(0, height) * _feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = _num_anchors
        K = shifts.shape[0]
        anchors = _anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        if opts['dropout_box_runoff_image']:
            _allowed_border = 16
            inds_inside = np.where(
                (proposals[:, 0] >= -_allowed_border) &
                (proposals[:, 1] >= -_allowed_border) &
                (proposals[:, 2] < im_info[1] + _allowed_border) &  # width
                (proposals[:, 3] < im_info[0] + _allowed_border)  # height
            )[0]
            proposals = proposals[inds_inside, :]
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        # print 'proposals', proposals
        # print 'scores', scores
        keep = nms(np.hstack((proposals, scores)).astype(np.float32), nms_thres)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * i
        blob.append(np.hstack((batch_inds, proposals.astype(np.float32, copy=False), scores.astype(np.float32, copy=False))))

    return np.concatenate(blob, axis=0)
Beispiel #32
0
def getRes_Img(sess, net, image):
    """Detect object classes in an image using pre-computed object proposals."""

    imgCon = image.imgcontent
    imgString = base64.b64decode(imgCon)
    nparr = np.fromstring(imgString, np.uint8)
    im = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

    # im=cv2.imread(image)
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    im = im[:, :, (2, 1, 0)]

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    res_img = Res_Image()  #初始化,图片中所有设备信息集合:设备信息+图片名称(编号)

    equiAllArr = []  #图片中所有设备的数组集合
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        equiarr = EquiArr(im, cls, dets,
                          thresh=CONF_THRESH)  #只能给出某一类设备的所有候选框的集合

        if equiarr != None:
            for x in equiarr:
                equiAllArr.append(x)  #将一张图片中所有设备的信息整合到一个数组中

    equiAllArr.sort(key=lambda Reco_Equipment: Reco_Equipment.acreage,
                    reverse=False)
    equiAllArrNew = []  #嵌套数组
    ds = []  #数组索引

    #候选框是并列的,人为的创建嵌套数组,
    for i in range(len(equiAllArr)):
        if '_' not in equiAllArr[i].equiName:
            if (i in ds):  # 已经访问过的不再访问,直接跳过进行下一个
                continue
            ds.append(i)
            xmin = equiAllArr[i].area.xmin
            ymin = equiAllArr[i].area.ymin
            xmax = equiAllArr[i].area.xmax
            ymax = equiAllArr[i].area.ymax
            equChilds = []  # 子集,即被嵌套的设备集合
            for m in range(0, len(equiAllArr)):
                if (m in ds):
                    continue
                xx1 = np.maximum(xmin, equiAllArr[m].area.xmin)
                yy1 = np.maximum(ymin, equiAllArr[m].area.ymin)
                xx2 = np.minimum(xmax, equiAllArr[m].area.xmax)
                yy2 = np.minimum(ymax, equiAllArr[m].area.ymax)
                w = np.maximum(0, xx2 - xx1 + 1)
                h = np.maximum(0, yy2 - yy1 + 1)
                inter = float(w * h)  # 重叠部分面积
                if (inter / equiAllArr[m].acreage >=
                        0.8) and equiAllArr[i].equiName == 'DLQ' and endwith(
                            equiAllArr[m].equiName, '_CT'):
                    h1 = np.maximum(ymin,
                                    equiAllArr[m].area.ymin) - np.minimum(
                                        ymin, equiAllArr[m].area.ymin)
                    h2 = np.maximum(ymax,
                                    equiAllArr[m].area.ymax) - np.minimum(
                                        ymax, equiAllArr[m].area.ymax)
                    if h1 <= h2:
                        child = Reco_Equipment_child()
                        child.equiName = equiAllArr[i].equiName + '_MHCT'
                        child.area = equiAllArr[m].area
                        equChilds.append(child)
                        ds.append(m)
                    else:
                        child = Reco_Equipment_child()
                        child.equiName = equiAllArr[i].equiName + '_ZZCT'
                        child.area = equiAllArr[m].area
                        equChilds.append(child)
                        ds.append(m)
                elif (inter / equiAllArr[m].acreage >= 0.8) and endwith(
                        equiAllArr[m].equiName, '_CT'):
                    child = Reco_Equipment_child()
                    child.equiName = equiAllArr[i].equiName + '_CT'
                    child.area = equiAllArr[m].area
                    equChilds.append(child)
                    ds.append(m)
                elif (inter / equiAllArr[m].acreage >= 0.8) and endwith(
                        equiAllArr[m].equiName, '_JT'):
                    child = Reco_Equipment_child()
                    child.equiName = equiAllArr[i].equiName + '_JT'
                    child.area = equiAllArr[m].area
                    equChilds.append(child)
                    ds.append(m)
                elif (inter / equiAllArr[m].acreage >= 0.8):
                    child = Reco_Equipment_child()
                    child.equiName = equiAllArr[m].equiName
                    child.area = equiAllArr[m].area
                    equChilds.append(child)
                    ds.append(m)

            equiAllArr[i].children = equChilds
            equiAllArrNew.append(equiAllArr[i])

    for i in range(len(equiAllArr)):
        if (i in ds):  # 已经访问过的不再访问,直接跳过进行下一个
            continue
        equChilds = []
        equiAllArr[i].children = equChilds
        equiAllArrNew.append(equiAllArr[i])

    res_img.imgID = image.imgID
    # 按照条件选取需要的内容
    if image.equiptype == '':
        res_img.equipments = equiAllArrNew
    else:
        equiresArr = []
        res_equip = str.lower(image.equiptype)
        for n in range(len(equiAllArrNew)):
            if res_equip in equiAllArrNew[n].equiName:
                equiresArr.append(equiAllArrNew[n])
        res_img.equipments = equiresArr

    return res_img