コード例 #1
0
ファイル: TattDL_detector.py プロジェクト: z-harry-sun/TattDL
def tattoo_detection(net, image_name, args):
    """Detect object classes in an image using pre-computed object proposals."""

    im_in = cv2.imread(image_name)

    if im_in is None:
        print('cannot open %s for read' % image_name )
        exit(-1)

    rows,cols = im_in.shape[:2]
    print([rows,cols])

    scale=1.0
    if rows >= cols:
        scale = float(args.longdim) / float(rows)
        im = cv2.resize( im_in, (int(0.5 + float(cols)*scale), args.longdim) )
    else:
        scale = float(args.longdim) / float(cols)
        im = cv2.resize( im_in, (args.longdim, int(0.5 + float(rows)*scale)) )

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    seconds = '%.3f' % timer.total_time
    print('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    max_scores = scores.max(axis=0)
    print(max_scores)
    print(boxes.shape)

    # Visualize detections for each class
    CONF_THRESH = args.threshold
    NMS_THRESH  = args.nms_thresh

    tattoo_dets=[]
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]

        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
        dets_filter = dets[inds]

        vis_detections(im, cls, dets_filter, thresh=CONF_THRESH)

        if cls == 'tattoo' and len(dets_filter)>0:
            plt.savefig(os.path.join(args.output, os.path.splitext(os.path.basename(image_name))[0] + '_det.png'))
            tattoo_dets = dets_filter

    if args.inspect == 'v':
        plt.show()
    plt.clf()

    return tattoo_dets, max_scores, seconds, scale
コード例 #2
0
ファイル: detect_multi.py プロジェクト: mhsung/TheiaSfM
def detect_bboxes(net, im_names, subset_classes):
    """Detect object classes in an image using pre-computed object proposals."""
    df = cnn_utils.create_bbox_data_frame(with_object_index=False)

    for im_name in im_names:
        print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
        print 'Demo for {}'.format(im_name)

        # Load the input image.
        im_file = os.path.join(FLAGS.data_dir, 'images', im_name)
        im = cv2.imread(im_file)
        im_size_x = im.shape[1]
        im_size_y = im.shape[0]

        # Detect all object classes and regress object bounds.
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(net, im)
        timer.toc()
        print ('Detection took {:.3f}s for '
               '{:d} object proposals').format(
            timer.total_time, boxes.shape[0])

        # Detect for each class
        for subset_cls_ind in range(len(class_names_to_be_detected)):
            cls = class_names_to_be_detected[subset_cls_ind]
            try:
                cls_ind = CLASSES.index(cls)
            except:
                print('error: class does not exist in training data: '
                      '{0}'.format(cls))
                exit(-1)

            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, FLAGS.nms_thresh)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= FLAGS.conf_thresh)[0]
            if len(inds) > 0:
                print ('{} {}(s) are detected.'.format(len(inds), cls))

            for i in inds:
                # ['image_name', 'class_index', 'x1', 'y1', 'x2', 'y2', 'score']
                x1 = dets[i, 0]
                y1 = dets[i, 1]
                x2 = dets[i, 2]
                y2 = dets[i, 3]
                score = dets[i, -1]
                if FLAGS.ignore_bbox_on_boundary:
                    # Ignore bounding boxes on the frame boundary.
                    if x1 <= 0 or x2 >= (im_size_x - 1) or \
                            y1 <= 0 or y2 >= (im_size_y - 1):
                        continue
                # Append a row.
                df.loc[len(df)] = [
                    im_name, subset_cls_ind, x1, y1, x2, y2, score]

    return df
コード例 #3
0
ファイル: minibatch.py プロジェクト: guanlongzhao/dehaze
def get_ohem_minibatch(loss, rois, labels, bbox_targets=None,
                       bbox_inside_weights=None, bbox_outside_weights=None):
    """Given rois and their loss, construct a minibatch using OHEM."""
    loss = np.array(loss)

    if cfg.TRAIN.OHEM_USE_NMS:
        # Do NMS using loss for de-dup and diversity
        keep_inds = []
        nms_thresh = cfg.TRAIN.OHEM_NMS_THRESH
        source_img_ids = [roi[0] for roi in rois]
        for img_id in np.unique(source_img_ids):
            for label in np.unique(labels):
                sel_indx = np.where(np.logical_and(labels == label, \
                                    source_img_ids == img_id))[0]
                if not len(sel_indx):
                    continue
                boxes = np.concatenate((rois[sel_indx, 1:],
                        loss[sel_indx][:,np.newaxis]), axis=1).astype(np.float32)
                keep_inds.extend(sel_indx[nms(boxes, nms_thresh)])

        hard_keep_inds = select_hard_examples(loss[keep_inds])
        hard_inds = np.array(keep_inds)[hard_keep_inds]
    else:
        hard_inds = select_hard_examples(loss)

    blobs = {'rois_hard': rois[hard_inds, :].copy(),
             'labels_hard': labels[hard_inds].copy()}
    if bbox_targets is not None:
        assert cfg.TRAIN.BBOX_REG
        blobs['bbox_targets_hard'] = bbox_targets[hard_inds, :].copy()
        blobs['bbox_inside_weights_hard'] = bbox_inside_weights[hard_inds, :].copy()
        blobs['bbox_outside_weights_hard'] = bbox_outside_weights[hard_inds, :].copy()

    return blobs
コード例 #4
0
ファイル: mydemo.py プロジェクト: CptSteven/faster-rcnn-mod
def detect(net, im):
    """ """


    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    res = 0
    global CLASSES
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        res += vis_detections(im, cls, dets, thresh=CONF_THRESH)
    return im,res
コード例 #5
0
ファイル: reducer.py プロジェクト: walkerning/lpirc2016
    def reduce_boxes(self, scores, boxes):
        """
        Reduce the result boxes
        """
        box_classes = []
        box_scores = np.array([], dtype="float32")
        for j in xrange(1, self.num_classes+1):
            # single-class NMS
            inds = np.where(scores[:, j] > self.score_thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                         .astype(np.float32, copy=False)
            keep = nms(cls_dets, self.IoU_thresh, self.force_cpu)
            # use vstack or list
            box_classes += [j] * len(keep)
            cls_dets = cls_dets[keep, :]
            box_scores = np.append(box_scores, cls_scores[keep])
            if j == 1:
                all_dets = cls_dets
            else:
                all_dets = np.vstack((all_dets, cls_dets))

        box_classes = np.array(box_classes, dtype=int)
        # Limit to max_per_image detections *over all classes*
        if len(box_classes) > self.max_per_image:
            indexes = np.argsort(-box_scores)[:self.max_per_image]
            all_dets = all_dets[indexes, :]
            box_classes = box_classes[indexes]

        return (box_classes, all_dets)
コード例 #6
0
def demo_detect(net, filename, blob_name='feat', threshold=0.5):
    """Detect persons in a gallery image and extract their features

    Arguments:
        net (caffe.Net): trained network
        filename (str): path to a gallery image file (jpg or png)
        blob_name (str): feature blob name. Default 'feat'
        threshold (float): detection score threshold. Default 0.5

    Returns:
        boxes (ndarray): N x 5 detected boxes in format [x1, y1, x2, y2, score]
        features (ndarray): N x D features matrix
    """
    im = cv2.imread(filename)
    boxes, scores, feat_dic = _im_detect(net, im, None, [blob_name])

    j = 1  # only consider j = 1 (foreground class)
    inds = np.where(scores[:, j] > threshold)[0]
    cls_scores = scores[inds, j]
    cls_boxes = boxes[inds, j*4:(j+1)*4]
    boxes = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(boxes, cfg.TEST.NMS)

    boxes = boxes[keep]
    features = feat_dic[blob_name][inds][keep]

    if boxes.shape[0] == 0:
        return None, None

    features = features.reshape(features.shape[0], -1)
    return boxes, features
コード例 #7
0
ファイル: tasks.py プロジェクト: suresecure/ivs-rpc
def detect_image(net, im):
    """Detect object classes in an image using pre-computed object proposals."""

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print(str(current_process().index)+' Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    # CONF_THRESH = 0.0

    CONF_THRESH = 0.4
    NMS_THRESH = 0.1
    person_idx = CLASSES.index('person')
    person_boxes = boxes[:, 4*person_idx:4*(person_idx + 1)]
    person_scores = scores[:, person_idx]
    person_dets = np.hstack((person_boxes,
                      person_scores[:, np.newaxis])).astype(np.float32)
    person_keep = nms(person_dets, NMS_THRESH)
    person_dets = person_dets[person_keep, :]
    # inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
    person_dets = person_dets[np.where(person_dets[:, -1] >= CONF_THRESH)]
    return person_dets
コード例 #8
0
ファイル: detect.py プロジェクト: mhsung/TheiaSfM
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(FLAGS.data_dir, 'images', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])
    print ('Target class: {}'.format(FLAGS.target_class))

    # Visualize detections for each class
    for cls_ind, cls in enumerate(CLASSES[1:]):
        #
        if cls != FLAGS.target_class:
            continue
        #
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, FLAGS.nms_thresh)
        dets = dets[keep, :]
        write_cropped_images(im, image_name, cls, dets, thresh=FLAGS.conf_thresh)
コード例 #9
0
ファイル: demo.py プロジェクト: Anjio/Faster-RCNN_TF
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
コード例 #10
0
ファイル: demo.py プロジェクト: brentsony/py-faster-rcnn
def demo (net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
コード例 #11
0
ファイル: detection.py プロジェクト: oyvindhg/FFIFaster-R-CNN
def clean_detections(scores, boxes, thresh = CONF_THRESH):
    """Clean up scores and boxes and return the interesting object in a handy format."""

    for detection_index in range(len(scores)):
        if scores[detection_index][0] > (1-CONF_THRESH):
            continue

    for cls_ind, cls in enumerate(CLASSES[1:]): #There is propably a better way to do this.
        #print cls_ind, cls
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]

        detections = []
        inds = np.where(dets[:, -1] >= thresh)[0]
        for i in inds:
            bbox = dets[i, :4]
            score = dets[i, -1]
            detections.append(({"person":score}, tuple(bbox)))

        return detections
コード例 #12
0
def demo(net, im, return_boxes):
    """Detect object classes in an image using pre-computed object proposals."""
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    classes = {}
    for cls_ind, cls in enumerate(CLASSES[1:]):
        try:
            cls_ind += 1 # because we skipped background
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            bboxes = vis_detections(im, cls, dets, return_boxes, thresh=CONF_THRESH)
            classes[cls] = bboxes
        except Exception as e:
            continue
    if not return_boxes:
        cv2.imshow("image", im)
    return classes
コード例 #13
0
ファイル: test.py プロジェクト: Austriker/py-faster-rcnn
def apply_nms(all_boxes, thresh):
    """
    Apply non-maximum suppression to all predicted boxes output by the
    test_net method.
    """
    num_classes = len(all_boxes)
    num_images = len(all_boxes[0])
    nms_boxes = [[[] for _ in range(num_images)]
                 for _ in range(num_classes)]

    for cls_ind in range(num_classes):
        for im_ind in range(num_images):
            dets = all_boxes[cls_ind][im_ind]

            if dets == []:
                continue
            # CPU NMS is much faster than GPU NMS when the number of boxes
            # is relative small (e.g., < 10k)
            # TODO(rbg): autotune NMS dispatch
            keep = nms(dets, thresh, force_cpu=True)

            if len(keep) == 0:
                continue

            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()

    return nms_boxes
コード例 #14
0
ファイル: demo.py プロジェクト: xyy19920105/py-faster-rcnn
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])
    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background\
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        order = cls_scores.argsort()[::-1]
        sorted_dets = dets[order, :]
        keep = nms(dets, NMS_THRESH)
        with open('/home/xyy/Desktop/doing/Object Detection/py-faster-rcnn/test_python.txt','w') as f:
            dets = dets[keep, :]
            for i in dets:
                for j in i:
                    f.write(str(j)+ ' ')
                f.write('\n')
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
コード例 #15
0
ファイル: demo.py プロジェクト: kiyomaro927/py-faster-rcnn
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.7
    NMS_THRESH = 0.3
    json_data_list = []
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        bbox, score = vis_detections(im, cls, dets, thresh=CONF_THRESH)
        if score:
            json_data_list.append({"class":cls,
                                   'bbox':bbox,
                                   'score':score})
        if len(json_data_list):
            f = open("result/"+image_name+".json", "w")
            json.dump(json_data_list, f, indent=2)
コード例 #16
0
def demo (net, imagePathName, scoreThreshold):
    """Detect object classes in an image using pre-computed object proposals."""
    # Load the demo image
    im = cv2.imread(imagePathName)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()

    scores, boxes = im_detect(net, im)

    timer.toc()
    debug('Object detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    path, imageFilename = os.path.split(imagePathName)
    catDir = os.path.split(path)[-1]
    imageName = catDir + '/' + imageFilename
    for i, cls in enumerate(CLASSES[1:]):
        i += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * i:4 * (i + 1)]
        cls_scores = scores[:, i]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESHOLD)
        dets = dets[keep, :]
        vis_detections(im, cls, imageName, dets, scoreThreshold)
コード例 #17
0
ファイル: caffe_chains.py プロジェクト: pulkitag/chainer
	def produce(self, ip):
		scores, bbox   = im_detect(self.net_, ip)
		#Find the top class for each box
		bestClass  = np.argmax(scores,axis=1)
		bestScore  = np.max(scores, axis=1)
		allDet     = edict()
		for cl in [self.prms_.targetClass]:
			clsIdx = self.cls_.index(cl)
			#Get all the boxes that belong to the desired class
			idx    = bestClass == clsIdx
			clScore = bestScore[idx]
			clBox   = bbox[idx,:]
			#Sort the boxes by the score
			sortIdx  = np.argsort(-clScore)
			topK     = min(len(sortIdx), self.prms_.topK)
			sortIdx  = sortIdx[0:topK]
			#Get the desired output
			clScore = clScore[sortIdx]
			clBox   = clBox[sortIdx]
			clBox   = clBox[:, (clsIdx * 4):(clsIdx*4 + 4)]
			#Stack detections and perform NMS
			dets=np.hstack((clBox, clScore[:,np.newaxis])).astype(np.float32)
			keep = nms(dets, self.prms_.nmsThresh)
			dets = dets[keep, :]
			#Only keep detections with high confidence
			inds = np.where(dets[:, -1] >= self.prms_.confThresh)[0]
			allDet[cl]   = copy.deepcopy(dets[inds])
		return allDet
コード例 #18
0
def detect_person(net, im,cls_ind=1,cls='person',CONF_THRESH = 0.8):
    """Detect object classes in an image using pre-computed object proposals."""

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    NMS_THRESH = 0.3
    cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
    cls_scores = scores[:, cls_ind]
    dets = np.hstack((cls_boxes,
                      cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, NMS_THRESH)
    # Filtering by confidence threshold as well
    keep = [ind for ind in keep if cls_scores[ind]>CONF_THRESH]
    if (len(keep)>1):
        sizes = np.zeros((len(keep),))
        for ind,curr_ind in enumerate(keep):
            bbox = dets[curr_ind,:4]
            sizes[ind] = (bbox[3]-bbox[1])*(bbox[2]-bbox[0])
        # Retain only the biggest bounding box
        keep = keep[np.argmax(sizes)]
    
    dets = dets[keep, :]
    return (dets.reshape(1,-1),cls_scores[keep])
コード例 #19
0
    def _post_process(self, scores, boxes):
        obj_list = []
        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, self.nms_thresh)
            dets = dets[keep, :]

            inds = np.where(dets[:, -1] >= self.conf_thresh)[0]
            if len(inds) == 0:
                continue

            for i in inds:
                obj = msg.object()
                obj.class_name = cls
                obj.score      = dets[i, -1]

                bbox = dets[i, :4]
                obj.region.x_offset = bbox[0]
                obj.region.y_offset = bbox[1]
                obj.region.width    = bbox[2] - bbox[0]
                obj.region.height   = bbox[3] - bbox[1]
                obj.region.do_rectify = False

                obj_list.append(obj)

        return obj_list
コード例 #20
0
ファイル: app.py プロジェクト: PierreHao/faster-rcnn-demo
def detect_objects(imgpath):
    """Detect object classes in an image using pre-computed object proposals."""

    print("in detect object")    
    # Load the demo image
    im_file = os.path.join(imgpath)
    im = cv2.imread(im_file)
    print("read image")

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    print("im_detect")
    scores, boxes = im_detect(app.config['net'], im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    results = dict()
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        results[cls] = detect_positions(im, cls, dets, thresh=CONF_THRESH)
    return results
コード例 #21
0
ファイル: detect.py プロジェクト: tejas0908/smart-traffic
    def detect(self, image):
        '''

        :param image: Image from which the objects should be detected
        :param CONF_THRESH: list of confidence threshold for each category. if None or empty 0.7 will be the threshold of each category
                            if non-empty but zero for some of the entries then 0.7 will be threshold for zero threshold value categories.
        :param NMS_THRESH: bounding box threshold, lower means less repetition and higher with high repetition
        :return: returns list of tuples of bounding box and category name as ((x1,y1,x2,y2),cls_name)
        '''
        start = time.time()
        bbox_class_list = []

        scores, boxes = im_detect(self.model, image)

        for cls_ind, (cls, threshold) in enumerate(zip(self.cat, self.CONF_THRESHOLD)):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, self.NMS_THRESHOLD)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= threshold)[0]

            for i in inds:
                # x1,y1,x2,y2 = dets[i,:-1]
                bbox_class_list.append(
                        {'bbox': dets[i, :-1].tolist(), 'category': cls, 'confidence': float(dets[i, -1])})
        end = time.time()

        return (bbox_class_list, end - start)
def demo(net, image_path):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_path)

    # Detect all object classes and regress object bounds
    started = time()
    scores, boxes = im_detect(net, im)
    elapsed = time() - started
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(elapsed, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
コード例 #23
0
ファイル: test_all.py プロジェクト: debidatta/py-faster-rcnn
def get_detections(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = image_name#os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    #timer = Timer()
    #timer.tic()
    scores, boxes, pose_a, pose_e = im_detect(net, im)
    #timer.toc()
    #print ('Detection took {:.3f}s for '
    #       '{:d} object proposals').format(timer.total_time, boxes.shape[0])
    #print "a=%s, e=%s"%(5*pose_a, 5*pose_e)
    # Visualize detections for each class
    #CONF_THRESH =0.25#0.75
    #print 'threashold: {}'.format(CONF_THRESH)
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = np.hstack((cls_boxes,
                          5*pose_a[:,np.newaxis], 5*pose_e[:,np.newaxis], cls_scores[:, np.newaxis])).astype(np.float32)
        dets = dets[keep, :]
        #print "a=%s, e=%s"%(5*pose_a[keep], 5*pose_e[keep])
    return dets
コード例 #24
0
ファイル: test.py プロジェクト: guoshouyan/faster-rcnn-car
def apply_nms(all_boxes, thresh):
    """Apply non-maximum suppression to all predicted boxes output by the
    test_net method.
    """
    num_classes = len(all_boxes)
    num_images = len(all_boxes[0])
    nms_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(num_classes)]

    for cls_ind in xrange(num_classes):
        
        if cls_ind ==0:
            thresh = 0.3
        else:
            thresh =0.7
            
        for im_ind in xrange(num_images):
            dets = all_boxes[cls_ind][im_ind]
            if dets == []:
                continue
            keep = nms(dets, thresh)
            if len(keep) == 0:
                continue
            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
    return nms_boxes
コード例 #25
0
def get_topK_boxes(scores, boxes, K):
    keep_boxes = []
    keep_scores = []
    keep_class = []
    for cls_ind, cls in enumerate(CLASSES):
        if cls_ind == 0:
            continue
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        keep_boxes.extend(cls_boxes[keep, :])
        keep_scores.extend(cls_scores[keep])
        keep_class.extend([cls_ind] * len(keep))
        
    mat_scores = np.array(keep_scores)
    mat_classes = np.array(keep_class)
    mat_boxes = np.array(keep_boxes, dtype='int16')       
    
    # get top K
    order = np.argsort(keep_scores)[::-1]
    keep = order[0:K]
    
    return (mat_boxes[keep, :], mat_scores[keep], mat_classes[keep])
コード例 #26
0
def demo(net, im_file):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image as gray scale
    gim = cv2.imread(im_file, flags= cv2.CV_LOAD_IMAGE_GRAYSCALE)
    # convert to rgb repeated in each channel
    im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR)
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
コード例 #27
0
def test_single_frame(sess, net, image_name, mask, force_cpu, output_dir):
    """Detect object classes in an image using pre-computed object proposals."""

    #********************
    # Need change here
    #********************
    im_file = os.path.join(cfg.DATA_DIR, 'test/images/', image_name)
    #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name)
    im_bgr = cv2.imread(im_file)
    im = np.zeros((im_bgr.shape[0], im_bgr.shape[1], 4))
    im[:,:,0:3] = im_bgr
    im[:,:,3] = mask

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    score, label, box, mask = im_detect(sess, net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, label.shape[0])

    # Visualize detections for each class
    im_rgb = im_bgr[:, :, (2, 1, 0)]
    im_mask = np.zeros(im_rgb.shape).astype(im_rgb.dtype)
    fig, ax = plt.subplots(figsize=(12, 12))
    # ax.imshow(im_rgb, aspect='equal')
    CONF_THRESH = 0.85
    NMS_THRESH = 0.3

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        i = np.where(label==cls_ind)[0]
        cls_score = score[i]
        cls_box = box[i, :]
        cls_mask = mask[i, :, :]
	if cfg.DEBUG:
	    print 'i: '
	    print i.shape
	    print 'cls_score: '
	    print cls_score
	    print 'box.shape: '
	    print box.shape
	    print 'cls_box shape: '
	    print cls_box.shape
        dets = np.hstack((cls_box,
                          cls_score[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH, force_cpu)
        dets = dets[keep, :]
        segs = cls_mask[keep, :, :]
        print ('After nms, {:d} object proposals').format(dets.shape[0])
        im_mask = vis_detections(im_rgb, im_mask, cls, dets, segs, ax, thresh=CONF_THRESH)
    # plt.savefig(os.path.join(output_dir, 'box_'+image_name))
    #im2 = cv2.imread(os.path.join(output_dir,'box_'+image_name))
    im_rgb += im_mask/2
    im_mask_grey = cv2.cvtColor(im_mask, cv2.COLOR_RGB2GRAY)
    im_mask_grey[np.where(im_mask_grey!=0)] = 255
    cv2.imwrite(os.path.join(output_dir,'output_'+image_name), im_rgb[:,:,(2,1,0)])
    cv2.imwrite(os.path.join(output_dir,'mask_'+image_name), im_mask_grey)
    return im_mask_grey
コード例 #28
0
ファイル: caltech.py プロジェクト: Steph0117/py-faster-rcnn
 def detect(file_path,  NMS_THRESH = 0.3):
     im = cv2.imread(file_path)
     scores, boxes = im_detect(net, im)
     cls_scores = scores[: ,1]
     cls_boxes = boxes[:, 4:8]
     dets = np.hstack((cls_boxes,cls_scores[:, np.newaxis])).astype(np.float32)
     keep = nms(dets, NMS_THRESH)
     return dets[keep, :]
コード例 #29
0
def demo(net, im_file, output_file):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.3 #0.8
    NMS_THRESH = 0.3 #0.3


    # Write output to file
    output = open(output_file, 'w')
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
	cls_boxes = cls_boxes.transpose()
	for box_ind, box in enumerate(cls_boxes):
	    output.write(CLASSES[cls_ind] + '\n')
 	    output.write(str(cls_scores[box_ind]) + '\n')
            output.write(' '.join([str(x) for x in cls_boxes[:,box_ind]]) + '\n')
    output.close()

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
コード例 #30
0
    def detectionMethod(self, im, im_copy, frame_gray, net, p, detectedItem, colorScheme):
        scores, boxes = im_detect(net, im)
        NMS_THRESH = 0.3
        detectedItemsInThisFrame = []

        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1 # because we skipped background
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                              cls_scores[:, np.newaxis])).astype(np.float32) #stacks them together
            keep = nms(dets, NMS_THRESH) #Removes overlapping bounding boxes
            dets = dets[keep, :]

            #print "if cls == {0}: {1}".format(str(detectedItem), (cls == detectedItem))
            if cls == detectedItem:
                print "under else"
                inds = np.where(dets[:, -1] >= 0.5)[0] #Threshold applied to score values here
                im = im[:, :, (2, 1, 0)]

                for i in inds:
                    print "running for loop"
                    bbox = dets[i, :4]
                    detectedBBox = bbox.astype(int)
                    score = dets[i, -1]
                    bboxCentroid = c.mathArrayCentroid(detectedBBox)
                    cv2.circle(im_copy, bboxCentroid, 5, colorScheme, -1) #bbox centroid of detectedItemArray
                    #Calculate bbox centroid. Use it to determine if the item should be added to detectedItemArray

                    #Check if the centroid of the detected box is within the designated traffic intersection area
                    if (str(detectedItem) == "car") or (str(detectedItem) == "bus"):
                        print "if statement, detected car or bus"
                        if p.contains_point(bboxCentroid) == 1:
                            print "within area"
                            #Calculate corners of interest within the bounding box area and add them all to the corner array
                            detectedPixels = frame_gray[bbox[1]:bbox[3], bbox[0]:bbox[2]] #[y1:y2, x1:x2]
                            detectedPixelsColor = im_copy[bbox[1]:bbox[3], bbox[0]:bbox[2]] #for show on colored image
                            corners = cv2.goodFeaturesToTrack(detectedPixels, mask=detectedPixels, **self.feature_params).reshape(-1, 2)

                            # for x, y in np.float32(corners).reshape(-1, 2): #black
                            #     cv2.circle(detectedPixels, (x,y), 5, (0, 0, 0), -1)
                            #     cv2.circle(detectedPixelsColor, (x, y), 5, (0, 0, 0), -1)

                            detectedItemsInThisFrame.append([[detectedBBox, corners]])
                        else:
                            print "car/bus not added. Coordinates: ", bbox
                    
                    else:
                        print "else not car or bus detected"
                        detectedPixels = frame_gray[bbox[1]:bbox[3], bbox[0]:bbox[2]] #[y1:y2, x1:x2]
                        detectedPixelsColor = im_copy[bbox[1]:bbox[3], bbox[0]:bbox[2]] #for show on colored image
                        corners = cv2.goodFeaturesToTrack(detectedPixels, mask=detectedPixels, **self.feature_params).reshape(-1, 2)

                print "detectedItemsInThisFrame len: {0}-------------------------------------".format(len(detectedItemsInThisFrame))
                print "detectedItemsInThisFrame: ", detectedItemsInThisFrame

                return detectedItemsInThisFrame
コード例 #31
0
def demo_tuples(net, image_name):
    """Detect objects, attributes and relations in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes, attr_scores, rel_scores = im_detect(net, im)
    if attr_scores is not None:
        print 'Found attribute scores'
    if rel_scores is not None:
        print 'Found relation scores'
        rel_scores = rel_scores[:, 1:]  # drop no relation
        rel_argmax = np.argmax(rel_scores, axis=1).reshape(
            (boxes.shape[0], boxes.shape[0]))
        rel_score = np.max(rel_scores, axis=1).reshape(
            (boxes.shape[0], boxes.shape[0]))

    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.1
    NMS_THRESH = 0.05
    ATTR_THRESH = 0.1

    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im)

    # Detections
    det_indices = []
    det_scores = []
    det_objects = []
    det_bboxes = []
    det_attrs = []

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = np.array(nms(dets, NMS_THRESH))
        dets = dets[keep, :]
        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]

        if len(inds) > 0:
            keep = keep[inds]
            for k in keep:
                det_indices.append(k)
                det_bboxes.append(cls_boxes[k])
                det_scores.append(cls_scores[k])
                det_objects.append(cls)
                if attr_scores is not None:
                    attr_inds = np.where(attr_scores[k][1:] >= ATTR_THRESH)[0]
                    det_attrs.append([ATTRS[ix] for ix in attr_inds])
                else:
                    det_attrs.append([])

    rel_score = rel_score[det_indices].T[det_indices].T
    rel_argmax = rel_argmax[det_indices].T[det_indices].T
    for i, (idx, score, obj, bbox, attr) in enumerate(
            zip(det_indices, det_scores, det_objects, det_bboxes, det_attrs)):
        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1],
                          fill=False,
                          edgecolor='red',
                          linewidth=3.5))
        box_text = '{:s} {:.3f}'.format(obj, score)
        if len(attr) > 0:
            box_text += "(" + ",".join(attr) + ")"
        ax.text(bbox[0],
                bbox[1] - 2,
                box_text,
                bbox=dict(facecolor='blue', alpha=0.5),
                fontsize=14,
                color='white')

        # Outgoing
        score = np.max(rel_score[i])
        ix = np.argmax(rel_score[i])
        subject = det_objects[ix]
        relation = RELATIONS[rel_argmax[i][ix]]
        print 'Relation: %.2f %s -> %s -> %s' % (score, obj, relation, subject)
        # Incoming
        score = np.max(rel_score.T[i])
        ix = np.argmax(rel_score.T[i])
        subject = det_objects[ix]
        relation = RELATIONS[rel_argmax[ix][i]]
        print 'Relation: %.2f %s -> %s -> %s' % (score, subject, relation, obj)

    ax.set_title(('detections with '
                  'p(object|box) >= {:.1f}').format(CONF_THRESH),
                 fontsize=14)
    plt.axis('off')
    plt.tight_layout()
    plt.draw()
    plt.savefig('data/demo/' +
                im_file.split('/')[-1].replace(".jpg", "_demo.jpg"))
コード例 #32
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[8, 16, 32],
                   anchor_ratios=[0.5, 1, 2]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(ratios=anchor_ratios,
                                scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2])
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # remove_option = 1
    # if ('TEST' == cfg_key and remove_option in [1, 2]):
    #     # get rid of boxes that are completely inside other boxes
    #     # with options as to which one to get rid of
    #     # 1. always the one with lower scores, 2. always the one inside
    #     new_proposals = []
    #     removed_indices = set()
    #     num_props = proposals.shape[0]
    #     for i in range(num_props):
    #         if (i in removed_indices):
    #             continue
    #         bxA = proposals[i, :]
    #         for j in range(num_props):
    #             if ((j == i) or (j in removed_indices)):
    #                 continue
    #             bxB = proposals[j, :]
    #             if (bbox_contains(bxA, bxB)):
    #                 if ((1 == remove_option) and (scores[i] != scores[j])):
    #                     if (scores[i] > scores[j]):
    #                         removed_indices.add(j)
    #                     else:
    #                         removed_indices.add(i)
    #                 else: # remove_option == 2 or scores[i] == scores[j]
    #                     removed_indices.add(j)
    #     nr = len(removed_indices)
    #     if (nr > 0):
    #         new_proposals = sorted(set(range(num_props)) - removed_indices)
    #         proposals = proposals[new_proposals, :]
    #         scores = scores[new_proposals]
    #         # padding to make the total number of proposals == post_nms_topN
    #         proposals = np.vstack((proposals, [proposals[-1, :]] * nr))
    #         scores = np.vstack((scores, [scores[-1]] * nr))

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    # batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    # BUT we NOW (18-Sep-2017) abuse batch inds, and use it for carrying scores
    if ('TEST' == cfg_key):
        batch_inds = np.reshape(scores, [proposals.shape[0], 1])
    else:
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)

    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    if (DEBUG):
        print('blob shape: {0}'.format(blob.shape))
        print('proposal shape: {0}'.format(proposals.shape))
    return blob
コード例 #33
0
ファイル: test.py プロジェクト: hainow/VL2
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
            if cfg.TEST.BOXSCORE:
                boxscores = roidb[i]['boxscores'][roidb[i]['gt_classes'] == 0]

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        if cfg.TEST.BOXSCORE:
            scores, boxes = im_detect(net, im, box_proposals, boxscores)
        else:
            scores, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            if scores.shape[1] == 20:
                newj = j - 1
            else:
                newj = j
            inds = np.where(scores[:, newj] > thresh)[0]
            cls_scores = scores[inds, newj]
            cls_boxes = boxes[inds, newj * 4:(newj + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect'].average_time,
                    _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
コード例 #34
0
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
    """Test a Fast R-CNN network on an image database."""
    if vis:
        from datasets.kitti import kitti
        kitti = kitti("valsplit")
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score, [cfg.VIEWP_BINS x viewpoint prob. dist])
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)

    cache_file = os.path.join(output_dir, 'detections.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
            #print '{} gt roidb loaded from {}'.format(self.name, cache_file)
            print 'Detections cache loaded'
            warnings.warn("PLEASE MAKE SURE THAT YOU REALLY WANT TO USE THE CACHE!", UserWarning)
            #return roidb
    else:

        # timers
        _t = {'im_detect' : Timer(), 'misc' : Timer()}

        if not cfg.TEST.HAS_RPN:
            roidb = imdb.roidb
        ndetections = 0

        if cfg.SMOOTH_L1_ANGLE:
            viewp_bins = 1
        elif cfg.CONTINUOUS_ANGLE:
            viewp_bins = 1
        else:
            viewp_bins = cfg.VIEWP_BINS

        if cfg.SMOOTH_L1_ANGLE:
            allclasses_viewp_bins = imdb.num_classes
        elif cfg.CONTINUOUS_ANGLE:
            allclasses_viewp_bins = 1
        else:
            allclasses_viewp_bins = imdb.num_classes*cfg.VIEWP_BINS

        for i, img_file in enumerate(imdb.image_index):

            if vis:
                detts = np.empty([0, 6])

            # filter out any ground truth boxes
            if cfg.TEST.HAS_RPN:
                box_proposals = None
            else:
                # The roidb may contain ground-truth rois (for example, if the roidb
                # comes from the training or val split). We only want to evaluate
                # detection on the *non*-ground-truth rois. We select those the rois
                # that have the gt_classes field set to 0, which means there's no
                # ground truth.
                if cfg.TEST.GTPROPOSALS:
                  box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] > -1]
                else:
                  box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

            if box_proposals is not None and box_proposals.shape[0] <= 0:
                # if there are no proposals....
                scores = np.empty((0, imdb.num_classes), dtype=np.float32)
                boxes = np.empty((0, imdb.num_classes*4), dtype=np.float32)
                if cfg.VIEWPOINTS:
                    assert cfg.CONTINUOUS_ANGLE==False and cfg.SMOOTH_L1_ANGLE==False, 'not implemented'
                    viewpoints = np.empty((0, allclasses_viewp_bins), dtype=np.float32)
            else:
                if cfg.TEST.FOURCHANNELS:
                    im = cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED)
                else:
                    im = cv2.imread(imdb.image_path_at(i))

                    _t['im_detect'].tic()
                    if cfg.VIEWPOINTS:
                        scores, boxes, viewpoints = im_detect(net, im, box_proposals)
                    else:
                        scores, boxes = im_detect(net, im, box_proposals)
                        _t['im_detect'].toc()

            _t['misc'].tic()
            # skip j = 0, because it's the background class
            for j in xrange(1, imdb.num_classes):
                inds = np.where(scores[:, j] > thresh)[0]
                ndetections += len(inds)
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, j*4:(j+1)*4]
                if cfg.VIEWPOINTS:
                    if cfg.SMOOTH_L1_ANGLE:
                        viewp = viewpoints[inds, j]
                        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], viewp[:, np.newaxis])) \
                            .astype(np.float32, copy=False)
                    elif cfg.CONTINUOUS_ANGLE:
                        viewp = viewpoints[inds]
                        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], viewp)) \
                            .astype(np.float32, copy=False)
                    else:
                        # Softmax is only performed over the class N_BINSx "slot"
                        # (that is why we apply it outside Caffe)
                        cls_viewp = softmax(viewpoints[inds, j*cfg.VIEWP_BINS:(j+1)*cfg.VIEWP_BINS])
                        # Assert that the result from softmax makes sense
                        assert(all(abs(np.sum(cls_viewp, axis=1)-1)<0.1))
                        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_viewp)) \
                            .astype(np.float32, copy=False)
                else:
                    cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                        .astype(np.float32, copy=False)
                if cfg.TEST.DO_NMS:
                    if cfg.USE_CUSTOM_NMS:
                        if cfg.VIEWPOINTS:
                            nms_returns = nms(cls_dets[:,:-viewp_bins], cfg.TEST.NMS, force_cpu=True)
                        else:
                            nms_returns = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
                        if nms_returns:
                            keep = nms_returns[0]
                            suppress = nms_returns[1]
                        else:
                            keep = []
                    elif cfg.TEST.SOFT_NMS>0:
                        if cfg.VIEWPOINTS:
                            keep = soft_nms(cls_dets[:, :-viewp_bins], method=cfg.TEST.SOFT_NMS)
                        else:
                            keep = soft_nms(cls_dets, method=cfg.TEST.SOFT_NMS)
                    else:
                        if cfg.VIEWPOINTS:
                            keep = nms(cls_dets[:,:-viewp_bins], cfg.TEST.NMS)
                        else:
                            keep = nms(cls_dets, cfg.TEST.NMS)
                    cls_dets = cls_dets[keep, :]
                else:
                    if cfg.VIEWPOINTS:
                        cls_dets = cls_dets[cls_dets[:,-viewp_bins-1].argsort()[::-1],:]
                    else:
                        cls_dets = cls_dets[cls_dets[:,-1].argsort()[::-1],:]

                if vis:
                    pre_detts = np.hstack((np.array(cls_dets[:,:5]), j*np.ones((np.array(cls_dets[:,:5]).shape[0],1))))
                    detts = np.vstack((detts, pre_detts))

                all_boxes[j][i] = cls_dets

            if vis:
                gt_roidb = kitti._load_kitti_annotation(img_file)
                vis_detections(im, imdb.classes, detts, gt_roidb)

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                if cfg.VIEWPOINTS:
                    image_scores = np.hstack([all_boxes[j][i][:, -viewp_bins-1]
                                                for j in xrange(1, imdb.num_classes)])
                else:
                    image_scores = np.hstack([all_boxes[j][i][:, -1]
                                                for j in xrange(1, imdb.num_classes)])

                if len(image_scores) > max_per_image:
                    # We usually don't want to do this
                    print "WARNING! Limiting the number of detections"
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in xrange(1, imdb.num_classes):
                        if cfg.VIEWPOINTS:
                            keep = np.where(all_boxes[j][i][:, -viewp_bins-1] >= image_thresh)[0]
                        else:
                            keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]
            _t['misc'].toc()

            print 'im_detect: {:d}/{:d} - {:d} detections - {:.3f}s {:.3f}s' \
                  .format(i + 1, num_images, ndetections,_t['im_detect'].average_time,
                          _t['misc'].average_time)

        det_file = os.path.join(output_dir, 'detections.pkl')
        with open(det_file, 'wb') as f:
            cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
コード例 #35
0
ファイル: test.py プロジェクト: liangfu/StuffNet
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if cfg.TEST.SEG:
        n_seg_classes = cfg.SEG_CLASSES
        confcounts = np.zeros((n_seg_classes, n_seg_classes))

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        if cfg.TEST.SEG:
            seg_gt = cv2.imread(
                get_seg_path(imdb._data_path, imdb.image_path_at(i)), -1)
            if seg_gt is None:
                print 'Could not read ', get_seg_path(imdb._data_path,
                                                      imdb.image_path_at(i))
            scores, boxes, seg_scores = im_detect(net, im, box_proposals)
        else:
            scores, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        if cfg.TEST.SEG:
            # evaluate the segmentation
            seg_labels = np.argmax(seg_scores, axis=2).astype(int)
            seg_labels = cv2.resize(seg_labels,
                                    (seg_gt.shape[1], seg_gt.shape[0]),
                                    interpolation=cv2.INTER_NEAREST)
            sumim = seg_gt + seg_labels * n_seg_classes
            hs = np.bincount(sumim.flatten(),
                             minlength=n_seg_classes * n_seg_classes)
            confcounts += hs.reshape((n_seg_classes, n_seg_classes))
            print 'Segmentation evaluation'
            conf = 100.0 * np.divide(confcounts,
                                     1e-20 + confcounts.sum(axis=1))
            np.save(output_dir + '/seg_confusion.npy', conf)
            acc = np.zeros(n_seg_classes)
            for j in xrange(n_seg_classes):
                gtj = sum(confcounts[j, :])
                resj = sum(confcounts[:, j])
                gtresj = confcounts[j, j]
                acc[j] = 100.0 * gtresj / (gtj + resj - gtresj)
            print 'Accuracies', acc
            print 'Mean accuracy', np.mean(acc)

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
コード例 #36
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[8, 16, 32]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2])
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
コード例 #37
0
ファイル: demo.py プロジェクト: zuqiutxy/Curve-Text-Detector
        cls_scores = scores[inds, 1]

        cls_boxes = boxes[inds, 4:8]
        ## curve
        cls_infos_h = infos_h[inds, :14]
        cls_infos_w = infos_w[inds, :14]

        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
            .astype(np.float32, copy=False)

        # stack h and w pred.
        cls_infos = np.zeros((cls_infos_h.shape[0], 28))
        wh_stack_temp = np.dstack((cls_infos_w, cls_infos_h))
        assert (wh_stack_temp.shape[0] == cls_infos.shape[0]
                ), 'wh stack length mismatch.'
        for ixstack, row_cls_infos in enumerate(cls_infos):
            cls_infos[ixstack] = wh_stack_temp[ixstack].ravel()

        cls_dets_withInfo = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_infos)) \
            .astype(np.float32, copy=False)

        cls_dets, cls_dets_withInfo = nps(cls_dets, cls_dets_withInfo)
        if cfg.TEST.USE_PNMS:
            keep = pnms(cls_dets_withInfo, cfg.TEST.PNMS)
        else:
            keep = nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        cls_dets_withInfo = cls_dets_withInfo[keep, :]

        vis(im, cls_dets_withInfo, 0.1)
コード例 #38
0
    def detect(self, im, im3d, detection_graph, trackingSess, adrfpFrame):
        """Detect object classes in an image using pre-computed object proposals."""

        # Load the demo image
        #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
        #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name)
        #im = cv2.imread(im_file)

        # Detect all object classes and regress object bounds
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(self.sess, self.net, im)
        hboxes, hscores = detector_utils.detect_objects(
            im, detection_graph, trackingSess)
        timer.toc()
        print('Detection took {:.3f}s for '
              '{:d} object proposals').format(timer.total_time, boxes.shape[0])

        num_hands_detect = 1
        score_thresh = 0.5
        points = detector_utils.draw_box_on_image(num_hands_detect,
                                                  score_thresh, hscores,
                                                  hboxes, 320, 240, im,
                                                  adrfpFrame)

        # Visualize detections for each class
        im = im[:, :, (2, 1, 0)]
        fig, ax = plt.subplots(figsize=(12, 12))
        ax.imshow(im, aspect='equal')

        CONF_THRESH = 0.8
        NMS_THRESH = 0.3
        for cls_ind, cls in enumerate(CLASSES[1:]):
            if cls == "hand": continue
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]

            inds = np.where(dets[:, -1] >= 0.7)[0]
            if len(inds) == 0:
                #print "preskacem "+str(dets[:, -1])
                continue

            im = self.vis_detections(im, cls, dets, inds, im3d, adrfpFrame,
                                     points)
        img = im[:, :, :].copy()
        img[:, :, 2] = im[:, :, 0].copy()
        img[:, :, 0] = im[:, :, 2].copy()

        name = None
        if self.iterPic < 10:
            name = "0" + str(self.iterPic)
        else:
            name = str(self.iterPic)
        cv2.imwrite(name + '.png', img)
        self.iterPic += 1
        return im
コード例 #39
0
ファイル: test.py プロジェクト: yanxp/ASM
def test_net(net, imdb, max_per_image=400, thresh=-np.inf, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)

    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}

    # imdb=get_testing_roidb(imdb)

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    # width = imdb._get_widths()

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
        im = cv2.imread(imdb.image_path_at(i))
        width = im.shape[1]
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, box_proposals)
        if cfg.TEST.USE_FLIPPED:
            # image flipped
            flipped_im= cv2.flip(im,1)

            flip_scores,flip_boxes=im_detect(net, flipped_im, box_proposals)

            for k in xrange(flip_boxes.shape[1]/4):
                # if True:
                #     vis_detections(flipped_im, imdb.classes[k], flip_boxes[:,k*4:k*4+4])
                # # raw_input()
                oldx1 = flip_boxes[:, k*4].copy()
                oldx2 = flip_boxes[:, k*4+2].copy()
                assert (flip_boxes[:, k*4] >= 0).all()
                assert (flip_boxes[:, k*4+2] >= flip_boxes[:, k*4]).all()
                assert (width>= flip_boxes[:, k*4+2]).all()  
                 
                flip_boxes[:, k*4] = width - oldx2

                flip_boxes[:,k*4+2] = width - oldx1 

                assert(flip_boxes[:, k*4+2]>=0).all()
                assert(flip_boxes[:,k*4]>=0).all()
                assert (width>= flip_boxes[:, k*4+2]).all() 
                assert (flip_boxes[:, k*4+2] >= flip_boxes[:, k*4]).all() 
            boxes = np.concatenate( (boxes, flip_boxes.copy()), axis = 0 )
            scores = np.concatenate( (scores, flip_scores.copy()), axis = 0 )
        _t['im_detect'].toc()
        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            if cfg.TEST.AGNOSTIC:
                cls_boxes = boxes[inds, 4:8]
            else:
                cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
コード例 #40
0
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    img_name = os.path.basename(image_name)
    # im_file = image_name
    # im = cv2.imread(im_file)
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    pimg = process_image(im)
    # cv2.imshow("Processed", pimg)
    # cv2.waitKey(0)
    im = pimg

    height, width = im.shape[:2]
    mid = width / 2.5
    # print('height = {} and width/2.5 = {}'.format(height, mid))

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    # print ('Detection took {:.3f}s for '
    #        '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        # vis_detections(im, cls, dets, thresh=CONF_THRESH)

        font = cv2.FONT_HERSHEY_SIMPLEX
        # print 'class index is {}'.format(cls_ind)

        color = (0, 0, 255)
        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
        if len(inds) > 0:
            for i in inds:
                bbox = dets[i, :4]
                score = dets[i, -1]
                cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                              color, 2)
                if bbox[0] < mid:
                    cv2.putText(im, 'left {:s}'.format(cls), (bbox[0], (int)(
                        (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1)
                else:
                    cv2.putText(im, 'right {:s}'.format(cls, score),
                                (bbox[0], (int)(
                                    (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1)

# cv2.putText(im,'{:s} {:.3f}'.format(cls, score),(bbox[0], (int)((bbox[1]- 2))), font, 0.5, (255,255,255), 1)

# Write the resulting frame
# print 'Final image name is {}'.format(img_name)
    splitName = os.path.splitext(img_name)[0]
    # print (os.path.splitext(img_name)[0])
    # print splitName
    # cv2.imwrite('{:s}_output.jpg'.format(splitName), im)

    ## Display output frame
    # cv2.imshow("output", im)
    # cv2.waitKey(0)

    ## Write output frame
    opDir = '/home/student/cmpe295-masters-project/faster-rcnn-resnet/data/output/'
    cv2.imwrite(os.path.join(opDir, img_name), im)
コード例 #41
0
def test_net(net, imdb,cfg,max_per_image=100, thresh=0.05, vis=False,task = 'det',nmhead=''):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    max_per_set = 40 * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = 100
    # detection threshold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]


    output_dir = get_output_dir(imdb, net)
    #print output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    #if not os.path.exists(output_dir):
    #    os.makedirs(output_dir)

    do_det   = task=='det';
    det_file = os.path.join(output_dir, 'detections.pkl')
    if (do_det and os.path.isfile(det_file)):
        all_boxes =  cPickle.load(open(det_file,'r'));
    else:
        # timers
        _t = {'im_detect' : Timer(), 'misc' : Timer()}
        if not cfg.TRAIN['HAS_RPN']:
            roidb = imdb.roidb

        #only_seg = cfg.TRAIN['USE_SEG'] & (not cfg.TRAIN['USE_DET'])
        do_seg   = task=='seg'
        do_edg   = task=='edg'
        do_nrm   = task=='nrm'
        do_sbd   = task=='sbd'
        do_prt   = task=='prt'
        do_sal   = task=='sal'

        print "num_images = ",num_images

        for i in xrange(num_images):
            next_file = os.path.join(output_dir,imdb.image_index[i] + '.mat')
            next_file_png = os.path.join(output_dir,imdb.image_index[i] + '.png')

            if os.path.exists(next_file) or os.path.exists(next_file_png):
                continue

            im = cv2.imread(imdb.image_path_at(i))
            #shape = im.shape
            #print im.shape
            _t['im_detect'].tic()
            scores, boxes = im_detect(net, im,cfg, None,do_det)
            _t['im_detect'].toc()
            _t['misc'].tic()

            # skip j = 0, because it's the background class
            if do_det:
                for j in xrange(1, imdb.num_classes):
                    inds = np.where(scores[:, j] > 0.05)[0]
                    cls_scores = scores[inds, j]
                    if cfg.TEST.AGNOSTIC:
                        cls_boxes = boxes[inds, 4:8]
                    else:
                        cls_boxes = boxes[inds, j*4:(j+1)*4]

                    cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                        .astype(np.float32, copy=False)
                    keep = nms(cls_dets, cfg.TEST.NMS)
                    cls_dets = cls_dets[keep, :]
                    if vis:
                        vis_detections(im, imdb.classes[j], cls_dets)
                    all_boxes[j][i] = cls_dets

                # Limit to max_per_image detections *over all classes*
                if max_per_image > 0:
                    image_scores = np.hstack([all_boxes[j][i][:, -1]
                                              for j in xrange(1, imdb.num_classes)])
                    if len(image_scores) > max_per_image:
                        image_thresh = np.sort(image_scores)[-max_per_image]
                        for j in xrange(1, imdb.num_classes):
                            keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                            all_boxes[j][i] = all_boxes[j][i][keep, :]
                _t['misc'].toc()

                print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
                      .format(i + 1, num_images, _t['im_detect'].average_time,
                              _t['misc'].average_time)

            #print "here!!"
            if do_edg:
               if nmhead[0:4]!='sigm':
                   nmhead = 'sigmoid-'+nmhead

            #from IPython import embed; embed()

            if (do_seg or do_prt):
                data = net.blobs[nmhead].data.copy();
                data= data.squeeze(axis=0)
                posteriors = data.argmax(axis=0)
                posteriors = posteriors.astype('uint8').copy();
                from PIL import Image
                im = Image.fromarray(posteriors)
                im.save(next_file_png)

            if (do_sal or do_edg):
                #print "nmhead: ",nmhead
                data = net.blobs[nmhead].data.copy();
                if nmhead=='mtn_result1':
                    slice = 0;
                else:
                    slice = 1;
                data= data.squeeze(axis=0)
                posteriors = 255.0*visualize_gray(data,slice);
                posteriors = posteriors.astype('uint8').copy();
                from PIL import Image
                im = Image.fromarray(posteriors)
                im.save(next_file_png)

            if (do_nrm):
                data  =net.blobs[nmhead].data.copy();
                data = -128.*data.copy().squeeze(axis=0) + 128.
                data = np.transpose(data,(1,2,0))
                matplotlib.image.imsave(next_file_png,data.astype('uint8').copy())

            if do_sbd:
                d = {'res':net.blobs[nmhead].data.copy()};
                scipy.io.savemat(next_file,d, oned_as='column')

            # end of for-loop over images

        if do_det:
            with open(det_file, 'wb') as f:
                cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    if do_det:
        print 'Evaluating detections'
        imdb.evaluate_detections(all_boxes, output_dir)
コード例 #42
0
def test_net_mask_reload(net_proto,
                         net_mask_proto,
                         weights,
                         imdb,
                         max_per_image=400,
                         thresh=-np.inf,
                         vis=False,
                         save_path="./output/"):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)

    caffe.set_mode_gpu()
    caffe.set_device(cfg.GPU_ID)

    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    net = caffe.Net(net_proto, weights, caffe.TEST)
    net.name = os.path.splitext(os.path.basename(weights))[0]
    output_dir = get_output_dir(imdb, net)

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # timers
    _t = {'im_detect': Timer(), 'im_seg': Timer(), 'misc': Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    del net
    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

        net = caffe.Net(net_proto, weights, caffe.TEST)
        net.name = os.path.splitext(os.path.basename(weights))[0]
        im = cv2.imread(imdb.image_path_at(i))
        print(im.shape)
        _t['im_detect'].tic()
        scores, boxes, feat = im_det(net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            if cfg.TEST.AGNOSTIC:
                cls_boxes = boxes[inds, 4:8]
            else:
                cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        print 'im_detection: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)
        del net

        net_mask = caffe.Net(net_mask_proto, weights, caffe.TEST)
        net_mask.name = os.path.splitext(os.path.basename(weights))[0]
        _t['im_seg'].tic()
        out_mask = np.zeros((im.shape[0], im.shape[1]))
        for j in xrange(1, imdb.num_classes):
            ins_index = 1
            boxes_this_im = all_boxes[j][i][:, :-1]
            seg = im_seg(net_mask, im, feat, boxes_this_im)
            # print(seg.shape)
            for ii in xrange(seg.shape[0]):
                seg_now = seg[ii][0]
                # seg_now = np.transpose(seg_now)
                box_now = boxes_this_im[ii]
                box_now = box_now.astype(int)
                if box_now[2] == box_now[0] or box_now[3] == box_now[1]:
                    continue
                # im_now=im[box_now[1]: box_now[3], box_now[0]:box_now[2]]
                # cv2.imshow("test", im_now)
                # cv2.waitKey()
                seg_org_size = cv2.resize(
                    seg_now,
                    (box_now[2] - box_now[0], box_now[3] - box_now[1]),
                    interpolation=cv2.INTER_NEAREST)
                # print((seg_org_size*99999).shape)
                # cv2.imshow("seg", seg_org_size*99999)
                # cv2.waitKey()
                seg_org_size = seg_org_size * ins_index
                out_mask[box_now[1]:box_now[3],
                         box_now[0]:box_now[2]] = seg_org_size
                ins_index += 1
        _t['im_seg'].toc()

        mask_save_path = os.path.join(
            save_path,
            os.path.basename(imdb.image_path_at(i)).replace(".jpg", ".png"))
        cv2.imwrite(mask_save_path, out_mask * 10)

        print 'im_seg: {:d}/{:d} {:.3f}s' \
              .format(i + 1, num_images, _t['im_seg'].average_time)
        del net_mask

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
コード例 #43
0
def demo_tuples(net, image_name):
    """Detect objects, attributes and relations in an image using pre-computed object proposals."""
    image_num = int(image_name.split(".")[0])
    att_unique = np.unique(att_names[image_num * scale:(image_num * scale +
                                                        scale)])
    print(att_unique)
    att_unique_adv = np.unique(
        att_names_adv[image_num * scale:(image_num * scale + scale)])
    cls_unique = np.unique(att_cls[image_num * scale:(image_num * scale +
                                                      scale)])
    print(cls_unique)
    cls_unique_adv = np.unique(att_cls_adv[image_num *
                                           scale:(image_num * scale + scale)])
    # Load the demo image
    im_file = os.path.join(
        "/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/CUB_clean",
        image_name)
    im = cv2.imread(im_file)
    print(im.shape)
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes, attr_scores, rel_scores = im_detect(net, im)
    if attr_scores is not None:
        print 'Found attribute scores'
    """
    if rel_scores is not None:
        print 'Found relation scores'
        rel_scores = rel_scores[:,1:] # drop no relation
        rel_argmax = np.argmax(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0]))
        rel_score = np.max(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0]))
    """
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.2
    NMS_THRESH = 0.05
    ATTR_THRESH = 0.1

    im = im[:, :, (2, 1, 0)]
    #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    fig, ax = plt.subplots(figsize=(12, 12))
    plt.imshow(im)

    # Detections
    det_indices = []
    det_scores = []
    det_objects = []
    det_bboxes = []
    det_attrs = []

    for cls_ind, cls in enumerate(classes[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = np.array(nms(dets, NMS_THRESH))
        dets = dets[keep, :]
        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]

        if len(inds) > 0:
            keep = keep[inds]
            for k in keep:
                det_indices.append(k)
                det_bboxes.append(cls_boxes[k])
                det_scores.append(cls_scores[k])
                det_objects.append(cls)
                if attr_scores is not None:
                    attr_inds = np.where(attr_scores[k][1:] >= ATTR_THRESH)[0]
                    det_attrs.append([attributes[ix] for ix in attr_inds])
                else:
                    det_attrs.append([])

    #rel_score = rel_score[det_indices].T[det_indices].T
    #rel_argmax = rel_argmax[det_indices].T[det_indices].T
    for i, (idx, score, obj, bbox, attr) in enumerate(
            zip(det_indices, det_scores, det_objects, det_bboxes, det_attrs)):
        attr_s = [element for element in attr if element in att_unique]

        for i in range(len(attr)):
            if attr[i] in att_unique:

                if obj in cls_unique:

                    box_text = obj
                    if len(attr) > 0:
                        box_text + " " + (attr[i])
                        ax.add_patch(
                            plt.Rectangle((bbox[0], bbox[1]),
                                          bbox[2] - bbox[0],
                                          bbox[3] - bbox[1],
                                          fill=False,
                                          edgecolor='red',
                                          linewidth=2,
                                          alpha=0.5))
                        ax.text(bbox[0],
                                bbox[1] - 2,
                                '%s' % (box_text),
                                bbox=dict(facecolor='blue', alpha=0.5),
                                fontsize=10,
                                color='white')

    plt.axis('off')
    plt.tight_layout()
    plt.draw()
    plt.savefig(
        '/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/clean_bb/'
        + image_name)
コード例 #44
0
def demoVideo(image):

    global count
    global cls_label
    global b_box

    count = count + 1

    # print ('count before = {}'.format(count))
    if (count % 10) > 0:
        im = process_image(image)

        height, width = im.shape[:2]
        mid = width / 2.5
        if cls_label is not None:
            # print('saved label is = {}'.format(cls_label))
            font = cv2.FONT_HERSHEY_SIMPLEX

            cv2.rectangle(im, (b_box[0], b_box[1]), (b_box[2], b_box[3]),
                          (0, 0, 255), 2)
            if b_box[0] < mid:
                # cv2.putText(im,'left {:s}'.format(label),(b_box[0], (int)((b_box[1]- 2))), cv2.FONT_HERSHEY_PLAIN, fontScale=1.25, thickness=3, color=(255, 255, 255))
                cv2.putText(im, 'left {:s}'.format(cls_label),
                            (b_box[0], (int)(
                                (b_box[1] - 2))), font, 0.5, (255, 0, 0), 1)
            else:
                cv2.putText(im, 'right {:s}'.format(cls_label),
                            (b_box[0], (int)(
                                (b_box[1] - 2))), font, 0.5, (255, 0, 0), 1)
        return im

    im = process_image(image)

    height, width = im.shape[:2]
    mid = width / 2.5
    # print('height = {} and width/2.5 = {}'.format(height, mid))

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(default_net, im)
    timer.toc()
    # print ('Detection took {:.3f}s for '
    #        '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    cls_label = None

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        font = cv2.FONT_HERSHEY_SIMPLEX

        color = (0, 0, 255)
        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
        if len(inds) > 0:
            for i in inds:
                bbox = dets[i, :4]
                b_box = bbox
                score = dets[i, -1]
                cls_label = cls
                cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                              color, 2)
                if bbox[0] < mid:
                    cv2.putText(im, 'left {:s}'.format(cls), (bbox[0], (int)(
                        (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1)
                else:
                    cv2.putText(im, 'right {:s}'.format(cls, score),
                                (bbox[0], (int)(
                                    (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1)

# cv2.putText(im,'{:s} {:.3f}'.format(cls, score),(bbox[0], (int)((bbox[1]- 2))), font, 0.5, (255,255,255), 1)
    return im
コード例 #45
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str('TRAIN' if self.phase == 0 else 'TEST') # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
        if cfg_key == 'TRAIN' and cfg.TRAIN.RPN_NORMALIZE_TARGETS:
            bbox_deltas *= cfg.TRAIN.RPN_NORMALIZE_STDS
            bbox_deltas += cfg.TRAIN.RPN_NORMALIZE_MEANS

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # print blob.shape
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob
        if DEBUG_SHAPE:
            print 'ProposalLayer top[0] size: {}'.format(top[0].data.shape)

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
            if DEBUG_SHAPE:
                print 'ProposalLayer top[0] size: {}'.format(top[0].data.shape)
コード例 #46
0
def demo(net, image_list):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.ROOT_DIR, 'data', image_list[0])
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class

    ind = 1
    color_list = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
    color_cls = [(0, 255, 255), (255, 0, 255), (255, 255, 0)]
    for j in range(1, len(CLASSES)):
        num_objs = int(image_list[ind + 1])
        for i in xrange(num_objs):
            x1 = int(float(image_list[ind + 2 + i * 4]))
            y1 = int(float(image_list[ind + 3 + i * 4]))
            x2 = int(float(image_list[ind + 4 + i * 4]))
            y2 = int(float(image_list[ind + 5 + i * 4]))
            rect_start = (x1, y1)
            rect_end = (x2, y2)
            #cv2.rectangle(im, rect_start, rect_end, color_list[j-1], 2)
        ind += 4 * num_objs + 1

    thresh = 0.5
    NMS_THRESH = 0.3
    path = os.path.join(cfg.ROOT_DIR, 'data', 'results', 'show',
                        image_list[0][17:])
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        inds = np.where(dets[:, -1] >= thresh)[0]

        index = 1
        if len(inds) == 0 and index == len(CLASSES[1:]):
            cv2.imwrite(path, im)
            return
        elif len(inds) == 0 and index < len(CLASSES[1:]):
            index += 1
            continue
        for i in inds:
            bbox = dets[i, :4]
            score = dets[i, -1]
            x = bbox[0]
            y = bbox[1]
            rect_start = (x, y)
            x1 = bbox[2]
            y1 = bbox[3]
            rect_end = (x1, y1)
            color_pred = color_cls[cls_ind - 1]
            cv2.rectangle(im, rect_start, rect_end, color_pred, 2)
    cv2.imwrite(path, im)
コード例 #47
0
def test_net(net, imdb):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    max_per_set = 40 * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = 100
    # detection thresold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh[j])[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            cls_boxes = cls_boxes[top_inds, :]
            # push new scores onto the minheap
            for val in cls_scores:
                heapq.heappush(top_scores[j], val)
            # if we've collected more than the max number of detection,
            # then pop items off the minheap and update the class threshold
            if len(top_scores[j]) > max_per_set:
                while len(top_scores[j]) > max_per_set:
                    heapq.heappop(top_scores[j])
                thresh[j] = top_scores[j][0]

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

            if 0:
                keep = nms(all_boxes[j][i], 0.3)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Applying NMS to all detections'
    nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)

    print 'Evaluating detections'
    imdb.evaluate_detections(nms_dets, output_dir)
コード例 #48
0
def get_detections_from_im(net, im_file, image_id, conf_thresh=0.2, visualize=False):
    """Load im_file and extract bottom-up features using Faster RCNN"""
    MIN_BOXES, MAX_BOXES=36,36
    NMS_THRESH = 0.05
    CONF_THRESH = 0.1
    ATTR_THRESH = 0.1
    im = cv2.imread(im_file)
    scores, boxes, attr_scores, rel_scores = im_detect(net, im)

    # Keep the original boxes, don't worry about the regresssion bbox outputs
    rois = net.blobs['rois'].data.copy()

    # unscale back to raw image space
    blobs, im_scales = _get_blobs(im, None)

    cls_boxes = rois[:, 1:5] / im_scales[0]
    cls_prob = net.blobs['cls_prob'].data
    pool5 = net.blobs['pool5_flat'].data
    # Keep only the best detections
    max_conf = np.zeros((rois.shape[0]))
    for cls_ind in range(1,cls_prob.shape[1]):
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = np.array(nms(dets, cfg.TEST.NMS))
        max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep])

    keep_boxes = np.where(max_conf >= conf_thresh)[0]
    if len(keep_boxes) < MIN_BOXES:
        keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
    elif len(keep_boxes) > MAX_BOXES:
        keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]

    #Normalize scores of best detections, sum their features to
    #obtain bottom-up attention features of im

    best_scores = max_conf[keep_boxes]
    best_feats = pool5[keep_boxes]
    scores_norm = np.expand_dims(np.exp(best_scores)/np.sum(np.exp(best_scores))+eps,axis=1)
    cumulative_feats = scores_norm.T.dot(best_feats)
    sum_feats = np.sum(best_feats,axis=0)

    if visualize:
        #To visualize the top scoring bounding boxes overlaid on the image im

        im = im[:, :, (2, 1, 0)]
        fig, ax = plt.subplots(figsize=(12, 12))
        ax.imshow(im, aspect='equal')

        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1 # because we skipped background
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            if attr_scores is not None:
                attributes = attr_scores[keep]
            else:
                attributes = None
            if rel_scores is not None:
                rel_argmax_c = rel_argmax[keep]
                rel_score_c = rel_score[keep]
            else:
                rel_argmax_c = None
                rel_score_c = None
            vis_detections(ax, cls, dets, attributes, rel_argmax_c, rel_score_c, thresh=CONF_THRESH)
        plt.savefig('./'+im_file.split('/')[-1].replace(".jpg", "_demo.png"))


    return {
        'image_id': image_id,
        'image_h': np.size(im, 0),
        'image_w': np.size(im, 1),
        'num_boxes' : len(keep_boxes),
        'boxes': base64.b64encode(cls_boxes[keep_boxes]),
        'features': base64.b64encode(pool5[keep_boxes]),
        'cumulative_feats':cumulative_feats, #softmax normalized features of best roi's
        'sum_feats':sum_feats # features of best roi's summed
    }
コード例 #49
0
def test_net(net, imdb, max_per_image=100, thresh=0.05, boxes_num_per_batch=0, vis=False, startIdx=0, endIdx=-1, saveMat=False, svm=False, use_wzctx=True):
    """Test a Fast R-CNN network on an image database."""
    if use_wzctx:
       print "use use_wzctx!!!" 
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    #print "4"
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}
    #print "5"
    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb
    
    if endIdx==-1:
        endIdx=num_images
    #print "6"
    for i in xrange(num_images):
        # filter out any ground truth boxes
        if i < startIdx or i>=endIdx:
            continue
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            #print "x"
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
            #print "y"
        im_name = imdb.image_path_at(i)
        im_name = im_name.split('/')[-1]
        im_name = im_name.split('.')[0]
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        #print "boxes_num %d"%boxes_num_per_batch
        if boxes_num_per_batch > 0:
            num_boxes = box_proposals.shape[0]
            num_batch = (num_boxes + boxes_num_per_batch -1) / boxes_num_per_batch
            #print "zzz"
            #num_boxes = roidb[i]['boxes'].shape[0]
            #num_batch = math.ceil(num_boxes/boxes_num_per_batch)
            scores_batch = np.zeros((num_batch*boxes_num_per_batch, imdb.num_classes), dtype=np.float32)
            boxes_batch = np.zeros((num_batch*boxes_num_per_batch, 4*imdb.num_classes), dtype=np.float32)
            # replicate the first box num_batch*boxes_num_per_batch times for preallocation
            rois = np.tile(box_proposals[0, :], (num_batch*boxes_num_per_batch, 1))         
            #print "xx"
            # assign real boxes to rois
            rois[:num_boxes, :] = box_proposals
            #print "num_batch: %d"%num_batch
            for j in xrange(int(num_batch)):
                roi = rois[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :]
                #print roi.shape
                score, box = im_detect(net, im, roi, svm, use_wzctx)
                scores_batch[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] = score# [:,:,0,0]
                boxes_batch[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] = box
               # print "6_%d"%j
            # discard duplicated results
            scores = scores_batch[:num_boxes, :]
            #print "kx"
            boxes = boxes_batch[:num_boxes, :]
        else:
            #print box_proposals.shape[0]
            scores, boxes = im_detect(net, im, box_proposals, svm, use_wzctx)
        mat_dir = os.path.join(output_dir, 'stage%s'%startIdx)
        if not os.path.exists(mat_dir):
            os.mkdir(mat_dir)
        if True:
            sio.savemat('%s/%s.mat' % (mat_dir,im_name + '_' + str(i) ), {'scores': scores, 'boxes': boxes})
        
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        #print "7"        
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            if cfg.TEST.BBOX_VOTE:
                cls_dets_after_nms = cls_dets[keep, :]
                cls_dets = bbox_voting(cls_dets_after_nms, cls_dets, threshold=cfg.TEST.BBOX_VOTE_THRESH)
            else:
                cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    #det_file = os.path.join(output_dir, 'detection_%sto%s.pkl' % (startIdx,endIdx))
    #with open(det_file, 'wb') as f:
    #    cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir, startIdx, endIdx)
コード例 #50
0
        numberImg += 1
        im = cv2.imread("/nfs/zhengmeisong/wkspace/caffe_wk/py-faster-rcnn/data/test/"+filename)
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(net, im)
        timer.toc()
        print ('No.{:d} took {:.3f}s for '
           '{:d} object proposals').format(numberImg, timer.total_time, boxes.shape[0])
        CONF_THRESH = 0.8
        NMS_THRESH = 0.3
        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1                  # because we skipped background
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]  #300*4矩阵
            cls_scores = scores[:, cls_ind]   #300行
            dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
            if len(inds) == 0:
                continue
            print inds,dets[inds[0]]
            for i in inds:
                bbox = dets[i, :4]
                score = dets[i, -1]
                cv2.rectangle(im, (bbox[0], bbox[1]),
                              (bbox[2],bbox[3]),
                              (0,0,255),2)

                cv2.putText(im, '{:s} {:.3f}'.format(cls, score),(bbox[0], bbox[1]),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),1,4,0)
            cv2.imwrite("/nfs/zhengmeisong/wkspace/caffe_wk/py-faster-rcnn/data/testOut/"+filename, im);
コード例 #51
0
def nms_detections(pred_boxes, scores, nms_thresh, inds=None):
    dets = np.hstack((pred_boxes, scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, nms_thresh)
    if inds is None:
        return pred_boxes[keep], scores[keep], keep
    return pred_boxes[keep], scores[keep], inds[keep], keep
コード例 #52
0
def vis_detections(im, wav_name, scores, boxes):
    """Draw detected bounding boxes."""
    '''
    im = im[:, :, (2, 1, 0)]
    im_size=im.shape
    fig, ax = plt.subplots(figsize=(im_size[1]/100.0, im_size[0]/100.0))
    ax.imshow(im, aspect='equal')
    '''
    result = open(data_folder + 'Result/task2_results.txt', 'at')
    f = open(
        data_folder + 'Result/estimate_txt/' + wav_name[:-4] + '_estimate.txt',
        'wt')
    result.write(wav_name)
    write_result = False
    cls_boxes = np.zeros((boxes.shape[0], 4), boxes.dtype)
    cls_boxes[:, 1] = boxes[:, 2]
    cls_boxes[:, 2] = 511
    cls_boxes[:, 3] = boxes[:, 3]
    for cls_ind, class_name in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        #np.save('data/'+im_name+'_keep.npy',keep)
        #sio.savemat('data/'+im_name+'_keep.mat',{'keep':keep})
        #dets = dets[keep, :]

        #dets=dets[np.argmax(dets[:,-1]),:]
        #dets=dets[np.newaxis,:]
        '''
	for i in keep:
	    if dets[i,-1]>(CONF_THRESH[cls_ind]+0.5*(1-scores[i,0])):

	'''
        dets = dets[keep, :]
        inds = np.where(dets[:, -1] > CONF_THRESH[cls_ind])[0]
        if len(inds) == 0:
            continue
        else:
            for i in inds:
                onset = str(dets[i, 1] * (nfft - noverlap) / common_fs)
                offset = str(dets[i, 3] * (nfft - noverlap) / common_fs)
                if not write_result:
                    result.write('\t' + onset + '\t' + offset + '\t' +
                                 name_transform[class_name])
                    write_result = True
                else:
                    result.write('\n' + wav_name + '\t' + onset + '\t' +
                                 offset + '\t' + name_transform[class_name])
                f.write(onset + '\t' + offset + '\t' +
                        name_transform[class_name] + '\n')
                '''
                ax.add_patch(
                    plt.Rectangle((dets[i,0], dets[i,1]),
                          dets[i,2] - dets[i,0],
                          dets[i,3] - dets[i,1], fill=False,
                          edgecolor='red', linewidth=3.5)
                )
                ax.text(dets[i,0], dets[i,1] - 2,
                    '{:s} {:.3f}'.format(class_name, dets[i,-1]),
                    bbox=dict(facecolor='blue', alpha=0.5),
                    fontsize=14, color='white')
		'''
    result.write('\n')
    result.close()
    f.close()
    '''
コード例 #53
0
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes, attr_scores, rel_scores = im_detect(net, im)
    #print 'relations'
    #print rel_scores.shape
    #rel_argmax = np.argsort(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0]))
    #rel_score = np.max(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0]))
    #print rel_argmax.shape
    #print rel_score.shape
    #print np.min(rel_score)
    #print np.max(rel_score)
    #np.savetxt('rel_score.csv', rel_score, delimiter=',')
    #np.savetxt('rel_argmax.csv', rel_argmax, delimiter=',')
    #print fail
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.4
    NMS_THRESH = 0.3

    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        if attr_scores is not None:
            attributes = attr_scores[keep]
        else:
            attributes = None
        if rel_scores is not None:
            rel_argmax_c = rel_argmax[keep]
            rel_score_c = rel_score[keep]
        else:
            rel_argmax_c = None
            rel_score_c = None
        vis_detections(ax,
                       cls,
                       dets,
                       attributes,
                       rel_argmax_c,
                       rel_score_c,
                       thresh=CONF_THRESH)
    plt.savefig('data/demo/' +
                im_file.split('/')[-1].replace(".jpg", "_demo.jpg"))
コード例 #54
0
    def forward(self, bottom, top):
        # params
        cfg_key = self.phase  # either 'TRAIN' or 'TEST'
        if cfg_key == 0:
            cfg_ = cfg.TRAIN
        else:
            cfg_ = cfg.TEST

        # corner params
        pt_thres = cfg_.PT_THRESH
        pt_max_num = cfg.PT_MAX_NUM
        pt_nms_range = cfg.PT_NMS_RANGE
        pt_nms_thres = cfg.PT_NMS_THRESH
        # proposal params
        ld_interval = cfg.LD_INTERVAL
        ld_um_thres = cfg.LD_UM_THRESH
        # rpn params
        # min_size = cfg_.RPN_MIN_SIZE
        nms_thresh = cfg_.RPN_NMS_THRESH
        pre_nms_topN = cfg_.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg_.RPN_POST_NMS_TOP_N

        im_info = bottom[0].data[0, :]
        score_tl = bottom[1].data[0, :].transpose((1, 2, 0))
        score_tr = bottom[2].data[0, :].transpose((1, 2, 0))
        score_br = bottom[3].data[0, :].transpose((1, 2, 0))
        score_bl = bottom[4].data[0, :].transpose((1, 2, 0))
        scores = np.concatenate([
            score_tl[:, :, :, np.newaxis], score_tr[:, :, :, np.newaxis],
            score_br[:, :, :, np.newaxis], score_bl[:, :, :, np.newaxis]
        ],
                                axis=3)

        map_info = scores.shape[:2]
        # 1. sample corner candidates from prob maps
        tl, tr, br, bl = _corner_sampling(scores, pt_thres, pt_max_num,
                                          pt_nms_range, pt_nms_thres)
        # 2. assemble corner candidates into proposals
        proposals = _proposal_sampling(tl, tr, br, bl, map_info, ld_interval,
                                       ld_um_thres)
        # 3. filter
        proposals = filter_quads(proposals)
        scores = proposals[:, 8]
        proposals = proposals[:, :8]
        # 3. rescale quads into raw image space
        proposals = proposals * self._feat_stride
        # 4. quadrilateral non-max surpression
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        keep = nms(
            np.hstack((proposals, scores[:, np.newaxis])).astype(np.float32,
                                                                 copy=False),
            nms_thresh)
        proposals = proposals[keep, :]
        scores = scores[keep]
        if post_nms_topN > 0:
            proposals = proposals[:post_nms_topN, :]
            scores = scores[:post_nms_topN]
        if proposals.shape[0] == 0:
            # add whole image to avoid error
            print 'NO PROPOSALS!'
            proposals = np.array(
                [[0, 0, im_info[1], 0, im_info[1], im_info[0], 0, im_info[0]]])
            scores = np.array([0.0])

        # output
        # top[0]: quads(x1, y1, x2, y2, x3, y3, x4, y4)
        # top[1]: rois(xmin, ymin, xmax, ymax, theta)
        # top[2]: scores
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        top[0].reshape(*blob.shape)
        top[0].data[...] = blob
        if len(top) > 1:
            if cfg.DUAL_ROI:
                rois = quad_2_obb(np.array(proposals, dtype=np.float32))
                rois = dual_roi(rois)
            else:
                rois = quad_2_obb(np.array(proposals, dtype=np.float32))
            batch_inds = np.zeros((rois.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, rois.astype(np.float32, copy=False)))
            top[1].reshape(*blob.shape)
            top[1].data[...] = blob
        if len(top) > 2:
            scores = np.vstack((scores, scores)).transpose()
            top[2].reshape(*scores.shape)
            top[2].data[...] = scores
コード例 #55
0
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(
        imdb.image_index)  # 这是 imdb 子类(比如 pascal_voc.py)从txt里读取的所有测试图片名称
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [
        [[] for _ in xrange(num_images)]  # 结构:[num_classes, num_images, (N*5)]
        for _ in xrange(imdb.num_classes)
    ]

    output_dir = get_output_dir(imdb, net)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb
    '''------- 对每一张图片依次操作 -------'''
    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        '''------- 返回结构: scores(R, cls_num), boxes(R, 4 x cls_num) -------'''
        scores, boxes = im_detect(net, im, box_proposals)  # -- 检测的关键步骤 --
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)  # 进行非最大值抑制(NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            # 插入一个 (n,5) 的结构, 其中5元 tuple 是 (x1, y1, x2, y2, score), 为第i张图片中第j类的检测结果
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(
        output_dir,
        'detections.pkl')  # 貌似这个 detections.pkl 没有用到,仅用到了 all_boxes
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)  # 存放检测文件

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
def demo(image_name, image_no, net):
    colors = [
        "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white",
        "darkblue", "orchid", "springgreen", "lime", "deepskyblue",
        "mediumvioletred", "maroon", "orangered", "navy", "olive", "orange",
        "orangered", "orchid", "pink", "plum", "purple", "salmon", "sienna",
        "silver", "tan", "teal", "tomato", "violet", "wheat", "yellow",
        "yellowgreen", "lavender", "palevioletred"
    ]

    conf_thresh = 0.3
    min_boxes = 36
    max_boxes = 36
    indexes = []
    cfg.TEST.NMS = 0.6

    im = cv2.imread(
        os.path.join(
            "/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/targeted_attacks_ep16/adv_images",
            image_name))
    cls_append = []
    scores, boxes, attr_scores, rel_scores = im_detect(net, im)
    print(image_no)
    # Keep the original boxes, don't worry about the regression bbox outputs
    rois = net.blobs['rois'].data.copy()
    # unscale back to raw image space
    blobs, im_scales = _get_blobs(im, None)

    cls_boxes = rois[:, 1:5] / im_scales[0]

    cls_prob = net.blobs['cls_prob'].data
    attr_prob = net.blobs['attr_prob'].data
    pool5 = net.blobs['pool5_flat'].data

    # Keep only the best detections
    max_conf = np.zeros((rois.shape[0]))
    for cls_ind in range(1, cls_prob.shape[1]):
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = np.array(nms(dets, cfg.TEST.NMS))

        max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep],
                                  cls_scores[keep], max_conf[keep])

    keep_boxes = np.where(max_conf >= conf_thresh)[0]

    if len(keep_boxes) < min_boxes:
        keep_boxes = np.argsort(max_conf)[::-1][:min_boxes]
    elif len(keep_boxes) > max_boxes:
        keep_boxes = np.argsort(max_conf)[::-1][:max_boxes]
        ############################
    att_unique = np.unique(att_names[image_no * scale:(image_no * scale +
                                                       scale)])

    att_unique_adv = np.unique(att_names_adv[image_no *
                                             scale:(image_no * scale + scale)])
    cls_unique = np.unique(att_cls[image_no * scale:(image_no * scale +
                                                     scale)])

    cls_unique_adv = np.unique(att_cls_adv[image_no * scale:(image_no * scale +
                                                             scale)])
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    sizes = np.shape(im)
    height = float(sizes[0])
    width = float(sizes[1])
    fig = plt.figure()
    fig.set_size_inches(width / height, 1, forward=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    plt.imshow(im)
    #colors=["blue","green","red","cyan","magenta","yellow","black","white","darkblue","orchid","springgreen","lime","deepskyblue","mediumvioletred","maroon","orangered"]
    boxes = cls_boxes[keep_boxes]
    #print (boxes)
    #print (keep_boxes)
    objects = np.argmax(cls_prob[keep_boxes][:, 1:], axis=1)
    attr_thresh = 0.1
    attr = np.argmax(attr_prob[keep_boxes][:, 1:], axis=1)
    attr_conf = np.max(attr_prob[keep_boxes][:, 1:], axis=1)
    count_box = 0
    for i in range(len(keep_boxes)):
        bbox = boxes[i]
        if bbox[0] == 0:
            bbox[0] = 1
        if bbox[1] == 0:
            bbox[1] = 1
        #cls = classes[objects[i]+1]
        if attr_conf[i] > attr_thresh:
            #for k in range (len(att_unique)):
            #   for l in range (len(cls_unique)):
            #if attributes[attr[i]+1]==att_unique[k]:
            #   if classes[objects[i]+1] == cls_unique[l]:
            #if attributes[attr[i]+1] not in att_unique_adv:
            #if classes[objects[i]+1] not in cls_unique_adv:

            if attributes[attr[i] + 1] in att_unique_adv:
                if classes[objects[i] + 1] in cls_unique_adv:
                    cls = attributes[attr[i] + 1] + " " + classes[objects[i] +
                                                                  1]
                    cls_append.append(cls)

                    count = cls_append.count(cls)
                    if count == 1:

                        count_box = count_box + 1
                        print(cls)
                        plt.gca().add_patch(
                            plt.Rectangle((bbox[0], bbox[1]),
                                          bbox[2] - bbox[0],
                                          bbox[3] - bbox[1],
                                          fill=False,
                                          edgecolor=colors[i],
                                          linewidth=0.3,
                                          alpha=0.5))
                        plt.gca().text(bbox[0],
                                       bbox[1] - 2,
                                       '%s' % (cls),
                                       bbox=dict(facecolor='blue',
                                                 alpha=0,
                                                 linewidth=0.2),
                                       fontsize=2.5,
                                       color=colors[i])

    #plt.suptitle((correct_cls[int(image_no)])+ " "+(wrong_cls[int(image_no)]),fontsize=2)
    plt.savefig(
        '/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/targeted_attacks_ep16/adv_bb1/adv_bb{}.jpg'
        .format(image_no),
        dpi=1500)
    #plt.savefig('/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/clean_bb/clean_bb{}_50.jpg'.format(image_no), dpi = 1500)
    #plt.tight_layout()

    plt.close()
コード例 #57
0
def bag_demo_double(net, image_name, cat_ids, bboxes):
    """Detect object classes in an image using pre-computed object proposals."""
    im = cv2.imread(image_name)
    # im = url_to_image(image_name)
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    # print scores
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    #######
    fig = plt.figure()
    ax1 = fig.add_subplot(1, 2, 1)
    #######
    colors = plt.cm.hsv(np.linspace(0, 1, len(CLASSES))).tolist()
    ax1.imshow(im)
    currentAxis = plt.gca()
    CONF_THRESH = 0.6
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)

        dets = dets[keep, :]
        keep_final = np.where(dets[:, 4] > CONF_THRESH)[0]
        for i in keep_final:
            xmin = dets[i, 0]
            ymin = dets[i, 1]
            xmax = dets[i, 2]
            ymax = dets[i, 3]
            score = dets[i, 4]
            label_name = cls
            display_txt = '%s: %.2f' % (label_name, score)
            coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
            color = colors[cls_ind]
            currentAxis.add_patch(
                plt.Rectangle(*coords,
                              fill=False,
                              edgecolor=color,
                              linewidth=2))
            currentAxis.text(xmin,
                             ymin,
                             display_txt,
                             bbox={
                                 'facecolor': color,
                                 'alpha': 0.5
                             })
    ax2 = fig.add_subplot(1, 2, 2)
    ax2.imshow(im)
    currentAxis = plt.gca()
    for idx, cls_ind in enumerate(cat_ids):
        cls = CLASSES[cls_ind]
        xmin = bboxes[idx, 0]
        ymin = bboxes[idx, 1]
        xmax = xmin + bboxes[idx, 2] - 1
        ymax = ymin + bboxes[idx, 3] - 1
        label_name = cls
        display_txt = '%s: %.2f' % (label_name, 1)
        coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
        color = colors[cls_ind]
        currentAxis.add_patch(
            plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
        currentAxis.text(xmin,
                         ymin,
                         display_txt,
                         bbox={
                             'facecolor': color,
                             'alpha': 0.5
                         })
    plt.show()
コード例 #58
0
ファイル: jnbdemo.py プロジェクト: tinytai/caffe-fast-rcnn
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""
    #print image_name
    #print '\n\n\n\n'
    # Load the demo image
    #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    #im_file = os.path.join('/tmp/caffe_demos_uploads/', image_name,'.png')
    #im=image_name
    #print im_file
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])
    print boxes.shape
    print scores.shape

    # Visualize detections for each class

    timer = Timer()
    timer.tic()

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        #print('111111111111111111111\n\n\n')
        #vis_detections(im, cls, dets, thresh=CONF_THRESH,)
        #        print('jnjnjnjjnjnjjnjjjjjjjjjj\n\n\n')
        #####################################################
        class_name = cls
        inds = np.where(dets[:, -1] >= 0.8)[0]
        #        print('begin\n\n\n')
        if len(inds) == 0:
            continue
#        print('123\n\n\n')
#im = im[:, :, (2, 1, 0)]
#fig, ax = plt.subplots(figsize=(12, 12))
#ax.imshow(im, aspect='equal')
        for i in inds:
            bbox = dets[i, :4]
            score = dets[i, -1]

            ax.add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1],
                              fill=False,
                              edgecolor='red',
                              linewidth=3.5))
            ax.text(
                bbox[0],
                bbox[1] - 2,
                #'{:s} {:.3f}'.format(class_name, score),
                class_name,
                bbox=dict(facecolor='blue', alpha=0.5),
                fontsize=14,
                color='white',
                fontproperties=myfont)

    #ax.set_title(('{} detections with '
    #              'p({} | box) >= {:.1f}').format(class_name, class_name,
    #                                              thresh),
    #              fontsize=14)
        plt.axis('off')
        plt.tight_layout()
        plt.draw()
#        print('end\n\n\n')
#ax.imwrite('123.jpg')
    plt.savefig(image_name, dpi=400, bbox_inches="tight")

    timer.toc()
    print('draw time {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])
コード例 #59
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[8, 16, 32]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  # return 9*4 anchors coordinates
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(
        rpn_cls_prob_reshape,
        [0, 3, 1, 2])  #2 score values after softmax------(1,18,14,14)
    rpn_bbox_pred = np.transpose(rpn_bbox_pred,
                                 [0, 3, 1, 2])  #------(1,36,14,14)
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]  # [max_length, max_width, im_scale]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    #12000 Number of top scoring boxes to keep before apply NMS to RPN proposals
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    #2000 Number of top scoring boxes to keep after applying NMS to RPN proposals
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    #0.7 NMS threshold used on RPN proposals
    min_size = cfg[cfg_key].RPN_MIN_SIZE
    #16 Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)

    # the first set of _num_anchors channels are bg probs(background)
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]  #-------(1,9,14,14)
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]  #14; 14

    if DEBUG:  # false
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(
        0, width
    ) * _feat_stride  #array([  0,  16,  32,  48,  64,  80,  96, 112, 128, 144, 160, 176, 192, 208])
    shift_y = np.arange(
        0, height
    ) * _feat_stride  #array([  0,  16,  32,  48,  64,  80,  96, 112, 128, 144, 160, 176, 192, 208])
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    """
    return[196,9,4]
    array([[[ -83.,  -39.,  100.,   56.],
        [-175.,  -87.,  192.,  104.],
        [-359., -183.,  376.,  200.],
        ...,
        [ -35.,  -79.,   52.,   96.],
        [ -79., -167.,   96.,  184.],
        [-167., -343.,  184.,  360.]],
       [[ -67.,  -39.,  116.,   56.],
        [-159.,  -87.,  208.,  104.],
        [-343., -183.,  392.,  200.],
        ...,
        [ -19.,  -79.,   68.,   96.],
        [ -63., -167.,  112.,  184.],
        [-151., -343.,  200.,  360.]],
       [[ -51.,  -39.,  132.,   56.],
        [-143.,  -87.,  224.,  104.],
        [-327., -183.,  408.,  200.],
        ...,
        [  -3.,  -79.,   84.,   96.],
        [ -47., -167.,  128.,  184.],
        [-135., -343.,  216.,  360.]],
       ...,
       [[  93.,  169.,  276.,  264.],
        [   1.,  121.,  368.,  312.],
        [-183.,   25.,  552.,  408.],
        ...,
        [ 141.,  129.,  228.,  304.],
        [  97.,   41.,  272.,  392.],
        [   9., -135.,  360.,  568.]],
       [[ 109.,  169.,  292.,  264.],
        [  17.,  121.,  384.,  312.],
        [-167.,   25.,  568.,  408.],
        ...,
        [ 157.,  129.,  244.,  304.],
        [ 113.,   41.,  288.,  392.],
        [  25., -135.,  376.,  568.]],
       [[ 125.,  169.,  308.,  264.],
        [  33.,  121.,  400.,  312.],
        [-151.,   25.,  584.,  408.],
        ...,
        [ 173.,  129.,  260.,  304.],
        [ 129.,   41.,  304.,  392.],
        [  41., -135.,  392.,  568.]]])"""
    anchors = anchors.reshape((K * A, 4))  #(14*14*9, 4)

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) (1,36,14,14)format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
        (-1, 4))  # (1,36,14,14)-----(1,14,14,36)------(14*14*9,4)

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape(
        (-1, 1))  # (1,9,14,14)-----(1,14,14,9)-----(14*14*9,1)

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)
    # generate the prodicted box(x1,y1,x2,y2) maped in original image (14*14*9, 4), only inside anchors,
    # 因为bbox_deltas(dx,dy,dw,dh)是相对于相应anchors(x1,y1,x2,y2)的偏移

    # 2. clip predicted boxes to image boundaries
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    #min_size:16; im_info[2]: max_width; """Remove all boxes with any side smaller than min_size. return index
    proposals = proposals[keep, :]  #len(keep)*4
    scores = scores[keep]  #len(keep)

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        #12000
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]  #len(order)*4
    scores = scores[order]  #len(order)

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    # nms_thresh: 0.7
    if post_nms_topN > 0:
        #2000
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]  #<=2000
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1),
                          dtype=np.float32)  #(2000, 1)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
コード例 #60
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        #assert bottom[0].data.shape[0] == 1, \
        #    'Only single item batches are supported'

	#print 'Configuration check here!';
        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE
	try:
		num_imgs      = cfg[cfg_key].IMS_PER_BATCH
	except:
		num_imgs = 1;
	
	#post_nms_topN = int(post_nms_topN/num_imgs)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data
        #im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0][0], im_info[0][1])
            print 'scale: {}'.format(im_info[0][2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
	blob_all = [];	

	#diggy: start
	#print 'Check the entire loop';
	for im_i in range(num_imgs):
	        # Transpose and reshape predicted bbox transformations to get them
	        # into the same order as the anchors:
	        #
	        # bbox deltas will be (1, 4 * A, H, W) format
	        # transpose to (1, H, W, 4 * A)
	        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
	        # in slowest to fastest order
		bbox_deltas_i = bbox_deltas[im_i];
		bbox_deltas_i = bbox_deltas_i.reshape(1,bbox_deltas_i.shape[0],bbox_deltas_i.shape[1],bbox_deltas_i.shape[2]);
	        bbox_deltas_i = bbox_deltas_i.transpose((0, 2, 3, 1)).reshape((-1, 4))
	
	        # Same story for the scores:
	        #
	        # scores are (1, A, H, W) format
	        # transpose to (1, H, W, A)
	        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
		scores_i = scores[im_i];
		scores_i = scores_i.reshape(1,scores_i.shape[0],scores_i.shape[1],scores_i.shape[2]);
	        scores_i = scores_i.transpose((0, 2, 3, 1)).reshape((-1, 1))
	
	        # Convert anchors into proposals via bbox transformations
	        proposals = bbox_transform_inv(anchors, bbox_deltas_i)
	
	        # 2. clip predicted boxes to image
	        proposals = clip_boxes(proposals, im_info[im_i][:2])
	
	        # 3. remove predicted boxes with either height or width < threshold
	        # (NOTE: convert min_size to input image scale stored in im_info[2])
	        keep = _filter_boxes(proposals, min_size * im_info[im_i][2])
	        proposals = proposals[keep, :]
	        scores_i = scores_i[keep]
	
	        # 4. sort all (proposal, score) pairs by score from highest to lowest
	        # 5. take top pre_nms_topN (e.g. 6000)
	        order = scores_i.ravel().argsort()[::-1]
	        if pre_nms_topN > 0:
	            order = order[:pre_nms_topN]
	        proposals = proposals[order, :]
	        scores_i = scores_i[order]
	
	        # 6. apply nms (e.g. threshold = 0.7)
	        # 7. take after_nms_topN (e.g. 300)
	        # 8. return the top proposals (-> RoIs top)
	        keep = nms(np.hstack((proposals, scores_i)), nms_thresh)
	        if post_nms_topN > 0:
	            keep = keep[:post_nms_topN]
	        proposals = proposals[keep, :]
	        scores_i = scores_i[keep]
	
	        # Output rois blob
	        # Our RPN implementation only supports a single input image, so all
	        # batch inds are 0
	        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
		batch_inds[:] = im_i;
	        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
		
		#print 'Blob copying check';
		if blob_all == [] and blob != []:
			blob_all = blob.copy();
		else:
			if blob != []:
				blob_all = np.vstack((blob_all,blob));
			
	#diggy: end
	#print 'Proposal layer: Dimensionality check';
        top[0].reshape(*(blob_all.shape))
        top[0].data[...] = blob_all

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores_i.shape))
            top[1].data[...] = scores_i