Esempio n. 1
0
 def load_dpm_dets_for_image(cls,
                             image,
                             dataset,
                             suffix='dets_all_may25_DP'):
     """
     Loads multi-class array of detections for an image from .mat format.
     """
     t = time.time()
     name = os.path.splitext(image.name)[0]
     # TODO: figure out how to deal with different types of detections
     dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP'
     filename = os.path.join(dets_dir, '%s_dets_all_may25_DP.mat' % name)
     if not os.path.exists(filename):
         dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval'
         filename = os.path.join(dets_dir,
                                 '%s_dets_all_jun1_DP_trainval.mat' % name)
         if not os.path.exists(filename):
             filename = os.path.join(config.test_support_dir,
                                     'dets/%s_dets_all_may25_DP.mat' % name)
             if not os.path.exists(filename):
                 print("File does not exist!")
                 return None
     mat = scipy.io.loadmat(filename)
     dets = mat['dets_mc']
     times = mat['times_mc']
     # feat_time = times[0,0]
     dets_seq = []
     cols = [
         'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy',
         'score', 'time'
     ]
     for cls_ind, cls in enumerate(config.pascal_classes):
         cls_dets = dets[cls_ind][0]
         if cls_dets.shape[0] > 0:
             det_time = times[cls_ind, 1]
             # all detections get the final time
             cls_dets = ut.append_index_column(cls_dets, det_time)
             cls_dets = ut.append_index_column(cls_dets, cls_ind)
             # subtract 1 pixel and convert from corners!
             cls_dets[:, :4] -= 1
             cls_dets[:, :4] = BoundingBox.convert_arr_from_corners(
                 cls_dets[:, :4])
             dets_seq.append(cls_dets)
     cols = [
         'x', 'y', 'w', 'h', 'dummy', 'dummy', 'dummy', 'dummy', 'score',
         'time', 'cls_ind'
     ]
     # NMS detections per class individually
     dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols})
     dets_mc[:, :4] = BoundingBox.clipboxes_arr(
         dets_mc[:, :4], (0, 0, image.size[0] - 1, image.size[1] - 1))
     time_elapsed = time.time() - t
     print("On image %s, took %.3f s" % (image.name, time_elapsed))
     return dets_mc
Esempio n. 2
0
 def load_csc_dpm_dets_for_image(cls, image, dataset):
     """
     Loads HOS's cascaded dets.
     """
     t = time.time()
     name = os.path.splitext(image.name)[0]
     # if uest dataset, use HOS's detections. if not, need to output my own
     if re.search('test', dataset.name):
         dirname = config.get_dets_test_wholeset_dir()
         filename = os.path.join(
             dirname,
             '%s_dets_all_test_original_cascade_wholeset.mat' % name)
     else:
         dirname = config.get_dets_nov19()
         filename = os.path.join(dirname, '%s_dets_all_nov19.mat' % name)
     print filename
     if not os.path.exists(filename):
         raise RuntimeError("File %s does not exist!" % filename)
         return None
     mat = scipy.io.loadmat(filename)
     dets = mat['dets_mc']
     times = mat['times_mc']
     # feat_time = times[0,0]
     dets_seq = []
     cols = [
         'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy',
         'dummy', 'dummy', 'score'
     ]
     for cls_ind, cls in enumerate(dataset.classes):
         cls_dets = dets[cls_ind][0]
         if cls_dets.shape[0] > 0:
             good_ind = [0, 1, 2, 3, 10]
             cls_dets = cls_dets[:, good_ind]
             det_time = times[cls_ind, 1]
             # all detections get the final time
             cls_dets = ut.append_index_column(cls_dets, det_time)
             cls_dets = ut.append_index_column(cls_dets, cls_ind)
             # convert from corners!
             cls_dets[:, :4] = BoundingBox.convert_arr_from_corners(
                 cls_dets[:, :4])
             cls_dets[:, :4] = BoundingBox.clipboxes_arr(
                 cls_dets[:, :4], (0, 0, image.size[0], image.size[1]))
             dets_seq.append(cls_dets)
     cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind']
     dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols})
     time_elapsed = time.time() - t
     print("On image %s, took %.3f s" % (image.name, time_elapsed))
     return dets_mc
Esempio n. 3
0
    def test_load_dpm_detections(self):
        conf = dict(self.config)
        conf['detectors'] = ['dpm']
        policy = DatasetPolicy(self.dataset, self.train_dataset, **conf)
        assert (policy.detectors == ['dpm'])
        dets = policy.load_ext_detections(self.dataset,
                                          'dpm_may25',
                                          force=True)
        dets = dets.with_column_omitted('time')

        # load the ground truth dets, processed in Matlab
        # (timely/data/test_support/concat_dets.m)
        filename = os.path.join(config.test_support_dir, 'val_dets.mat')
        dets_correct = Table(
            scipy.io.loadmat(filename)['dets'], [
                'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy',
                'score', 'cls_ind', 'img_ind'
            ], 'dets_correct')
        dets_correct = dets_correct.subset(
            ['x1', 'y1', 'x2', 'y2', 'score', 'cls_ind', 'img_ind'])
        dets_correct.arr[:, :4] -= 1
        dets_correct.arr[:, :4] = BoundingBox.convert_arr_from_corners(
            dets_correct.arr[:, :4])
        dets_correct.cols = ['x', 'y', 'w', 'h', 'score', 'cls_ind', 'img_ind']

        print('----mine:')
        print(dets)
        print('----correct:')
        print(dets_correct)
        assert (dets_correct == dets)
    def test_load_dpm_detections(self):
        conf = dict(self.config)
        conf["detectors"] = ["dpm"]
        policy = DatasetPolicy(self.dataset, self.train_dataset, **conf)
        assert policy.detectors == ["dpm"]
        dets = policy.load_ext_detections(self.dataset, "dpm_may25", force=True)
        dets = dets.with_column_omitted("time")

        # load the ground truth dets, processed in Matlab
        # (timely/data/test_support/concat_dets.m)
        filename = os.path.join(config.test_support_dir, "val_dets.mat")
        dets_correct = Table(
            scipy.io.loadmat(filename)["dets"],
            ["x1", "y1", "x2", "y2", "dummy", "dummy", "dummy", "dummy", "score", "cls_ind", "img_ind"],
            "dets_correct",
        )
        dets_correct = dets_correct.subset(["x1", "y1", "x2", "y2", "score", "cls_ind", "img_ind"])
        dets_correct.arr[:, :4] -= 1
        dets_correct.arr[:, :4] = BoundingBox.convert_arr_from_corners(dets_correct.arr[:, :4])
        dets_correct.cols = ["x", "y", "w", "h", "score", "cls_ind", "img_ind"]

        print ("----mine:")
        print (dets)
        print ("----correct:")
        print (dets_correct)
        assert dets_correct == dets
Esempio n. 5
0
    def get_neg_windows(self, num, cls=None, window_params=None, max_overlap=0,
                        max_num_images=250):
        """
        Return array of num windows that can be generated with window_params
        that do not overlap with ground truth by more than max_overlap.
        * If cls is not given, returns ground truth for all classes.
        * If max_num_images is given, samples from at most that many images.
        """
        sw = SlidingWindows(self, self)
        if not window_params:
            window_params = sw.get_default_window_params(cls)
        all_windows = []
        image_inds = self.get_pos_samples_for_class(cls)

        max_num = len(image_inds)
        inds = image_inds
        if max_num_images:
            inds = skutil.random_subset(image_inds, max_num_images)
        num_per_image = round(1. * num / max_num)
        for ind in inds:
            image = self.images[ind]
            windows = image.get_windows(window_params)
            gts = image.get_ground_truth(cls)
            for gt in gts.arr:
                overlaps = BoundingBox.get_overlap(windows[:, :4], gt[:4])
                windows = windows[overlaps <= max_overlap, :]
            if windows.shape[0] == 0:
                continue
            ind_to_take = skutil.random_subset_up_to_N(
                windows.shape[0], num_per_image)
            all_windows.append(np.hstack(
                (windows[ind_to_take, :], np.tile(ind, (ind_to_take.shape[0], 1)))))
        all_windows = np.concatenate(all_windows, 0)
        return all_windows[:num, :]
 def load_dpm_dets_for_image(cls, image, dataset, suffix='dets_all_may25_DP'):
     """
     Loads multi-class array of detections for an image from .mat format.
     """
     t = time.time()
     name = os.path.splitext(image.name)[0]
     # TODO: figure out how to deal with different types of detections
     dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP'
     filename = os.path.join(dets_dir, '%s_dets_all_may25_DP.mat' % name)
     if not os.path.exists(filename):
         dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval'
         filename = os.path.join(
             dets_dir, '%s_dets_all_jun1_DP_trainval.mat' % name)
         if not os.path.exists(filename):
             filename = os.path.join(
                 config.test_support_dir, 'dets/%s_dets_all_may25_DP.mat' % name)
             if not os.path.exists(filename):
                 print("File does not exist!")
                 return None
     mat = scipy.io.loadmat(filename)
     dets = mat['dets_mc']
     times = mat['times_mc']
     # feat_time = times[0,0]
     dets_seq = []
     cols = ['x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy',
             'dummy', 'score', 'time']
     for cls_ind, cls in enumerate(config.pascal_classes):
         cls_dets = dets[cls_ind][0]
         if cls_dets.shape[0] > 0:
             det_time = times[cls_ind, 1]
             # all detections get the final time
             cls_dets = ut.append_index_column(cls_dets, det_time)
             cls_dets = ut.append_index_column(cls_dets, cls_ind)
             # subtract 1 pixel and convert from corners!
             cls_dets[:, :4] -= 1
             cls_dets[:, :
                      4] = BoundingBox.convert_arr_from_corners(cls_dets[:, :4])
             dets_seq.append(cls_dets)
     cols = ['x', 'y', 'w', 'h', 'dummy', 'dummy', 'dummy',
             'dummy', 'score', 'time', 'cls_ind']
     # NMS detections per class individually
     dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols})
     dets_mc[:, :4] = BoundingBox.clipboxes_arr(
         dets_mc[:, :4], (0, 0, image.size[0] - 1, image.size[1] - 1))
     time_elapsed = time.time() - t
     print("On image %s, took %.3f s" % (image.name, time_elapsed))
     return dets_mc
Esempio n. 7
0
    def load_from_pascal_xml_filename(cls, classes, filename, images_dir):
        "Load image info from a file in the PASCAL VOC XML format."

        def get_data_from_tag(node, tag):
            if tag is "bndbox":
                x1 = int(node.getElementsByTagName(
                    tag)[0].childNodes[1].childNodes[0].data)
                y1 = int(node.getElementsByTagName(
                    tag)[0].childNodes[3].childNodes[0].data)
                x2 = int(node.getElementsByTagName(
                    tag)[0].childNodes[5].childNodes[0].data)
                y2 = int(node.getElementsByTagName(
                    tag)[0].childNodes[7].childNodes[0].data)
                return (x1, y1, x2, y2)
            else:
                return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        # image info
        name = get_data_from_tag(data, "filename")
        filename = os.path.join(images_dir, name)
        size = data.getElementsByTagName("size")[0]
        im_width = int(get_data_from_tag(size, "width"))
        im_height = int(get_data_from_tag(size, "height"))
        width = im_width
        height = im_height
        img = Image(width, height, classes, name)

        # per-object info
        objects = []
        for obj in data.getElementsByTagName("object"):
            clas = str(get_data_from_tag(obj, "name")).lower().strip()
            diff = int(get_data_from_tag(obj, "difficult"))
            trun = int(get_data_from_tag(obj, "truncated"))
            rect = get_data_from_tag(obj, "bndbox")
            bbox = BoundingBox(rect, format='corners')
            cls_ind = classes.index(clas)
            objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun)))
        if len(objects) > 0:
            img.objects_table = Table(np.array(objects), cls.columns)
        else:
            img.objects_table = Table(None, cls.columns)
        return img
 def load_csc_dpm_dets_for_image(cls, image, dataset):
     """
     Loads HOS's cascaded dets.
     """
     t = time.time()
     name = os.path.splitext(image.name)[0]
     # if uest dataset, use HOS's detections. if not, need to output my own
     if re.search('test', dataset.name):
         dirname = config.get_dets_test_wholeset_dir()
         filename = os.path.join(
             dirname, '%s_dets_all_test_original_cascade_wholeset.mat' % name)
     else:
         dirname = config.get_dets_nov19()
         filename = os.path.join(dirname, '%s_dets_all_nov19.mat' % name)
     print filename
     if not os.path.exists(filename):
         raise RuntimeError("File %s does not exist!" % filename)
         return None
     mat = scipy.io.loadmat(filename)
     dets = mat['dets_mc']
     times = mat['times_mc']
     # feat_time = times[0,0]
     dets_seq = []
     cols = ['x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy',
             'dummy', 'dummy', 'dummy', 'score']
     for cls_ind, cls in enumerate(dataset.classes):
         cls_dets = dets[cls_ind][0]
         if cls_dets.shape[0] > 0:
             good_ind = [0, 1, 2, 3, 10]
             cls_dets = cls_dets[:, good_ind]
             det_time = times[cls_ind, 1]
             # all detections get the final time
             cls_dets = ut.append_index_column(cls_dets, det_time)
             cls_dets = ut.append_index_column(cls_dets, cls_ind)
             # convert from corners!
             cls_dets[:, :
                      4] = BoundingBox.convert_arr_from_corners(cls_dets[:, :4])
             cls_dets[:, :4] = BoundingBox.clipboxes_arr(
                 cls_dets[:, :4], (0, 0, image.size[0], image.size[1]))
             dets_seq.append(cls_dets)
     cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind']
     dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols})
     time_elapsed = time.time() - t
     print("On image %s, took %.3f s" % (image.name, time_elapsed))
     return dets_mc
Esempio n. 9
0
 def load_from_json(cls, classes, data):
     "Return an Image instantiated from a JSON representation."
     name = data['name']
     width = data['size'][0]
     height = data['size'][1]
     img = Image(width, height, classes, name)
     objects = []
     for obj in data['objects']:
         bbox = BoundingBox(obj['bbox'])
         cls_name = obj['class']
         cls_ind = classes.index(cls_name)
         diff = obj['diff']
         trun = obj['trun']
         objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun)))
     if len(objects) > 0:
         img.objects_table = Table(np.array(objects), cls.columns)
     else:
         img.objects_table = Table(None, cls.columns)
     return img
Esempio n. 10
0
 def get_windows(clas, image, cls=None, window_params=None, with_time=False):
     """
 Return all windows that can be generated with window_params.
 If with_time=True, return tuple of (windows, time_elapsed).
 """
     assert cls or window_params
     if not window_params:
         window_params = self.get_default_window_params(cls)
     t = time.time()
     stride = window_params.stride
     min_width = window_params.min_width
     actual_xs = []
     actual_ys = []
     actual_ws = []
     actual_hs = []
     num_windows = 0
     # we want to be able to capture objects that extend past the image
     # we always iterate over locations in native space, and convert to
     # actual image space when we record the window
     w_pad = int(1.0 * min_width / 2)
     x_min = -w_pad
     for scale in window_params.scales:
         x_max = int(image.width * scale) - w_pad
         if w_pad > 0:
             x_max += stride
         actual_w = int(min_width / scale) + 1
         for ratio in window_params.aspect_ratios:
             h_pad = int(1.0 * min_width * ratio / 2)
             y_min = -h_pad
             y_max = int(image.height * scale) - h_pad
             if h_pad > 0:
                 y_max += stride
             actual_h = int(min_width / scale * ratio) + 1
             for y in range(y_min, y_max, stride):
                 for x in range(x_min, x_max, stride):
                     actual_ws.append(actual_w)
                     actual_hs.append(actual_h)
                     actual_xs.append(int(x / scale))
                     actual_ys.append(int(y / scale))
     windows = np.array([actual_xs, actual_ys, actual_ws, actual_hs]).T
     windows = BoundingBox.clipboxes_arr(windows, (0, 0, image.width, image.height))
     if with_time:
         time_elapsed = time.time() - t
         return (windows, time_elapsed)
     else:
         return windows
Esempio n. 11
0
 def get_pos_windows(self, cls=None, window_params=None, min_overlap=0.7):
     """
     Return array of all ground truth windows for the class, plus windows
     that can be generated with window_params that overlap with it by more
     than min_overlap.
     * If cls not given, return positive windows for all classes.
     * If window_params not given, use default for the class.
     * Adjust min_overlap to fetch fewer windows.
     """
     sw = SlidingWindows(self, self)
     if not window_params:
         window_params = sw.get_default_window_params(cls)
     overlapping_windows = []
     image_inds = self.get_pos_samples_for_class(cls)
     times = []
     window_nums = []
     for i in image_inds:
         image = self.images[i]
         gts = image.get_ground_truth(cls)
         if gts.arr.shape[0] > 0:
             overlap_wins = gts.arr[:, :4]
             overlap_wins = np.hstack(
                 (overlap_wins, np.tile(i, (overlap_wins.shape[0], 1))))
             overlapping_windows.append(overlap_wins.astype(int))
             windows, time_elapsed = image.get_windows(
                 window_params, with_time=True)
             window_nums.append(windows.shape[0])
             times.append(time_elapsed)
             for gt in gts.arr:
                 overlaps = BoundingBox.get_overlap(windows[:, :4], gt[:4])
                 overlap_wins = windows[overlaps >= min_overlap, :]
                 overlap_wins = np.hstack((
                     overlap_wins, np.tile(i, (overlap_wins.shape[0], 1))))
                 overlapping_windows.append(overlap_wins.astype(int))
                 windows = windows[overlaps < min_overlap, :]
     overlapping_windows = np.concatenate(overlapping_windows, 0)
     print("Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec" % (
         np.mean(window_nums), np.std(window_nums),
         np.mean(times), np.std(times)))
     return overlapping_windows
Esempio n. 12
0
def compute_det_pr_and_hard_neg(dets, gt, min_overlap=0.5):
    """
    Compute the Precision-Recall and find hard negatives of the given detections
    for the ground truth.

    Args:
        dets (skpyutils.Table): detections.
            !NOTE: the first four columns must be the bounding box coordinates!

        gt (skpyutils.Table): detectin ground truth
            Can be for a single image or a whole dataset, and can contain either all
            classes or a single class. The 'cls_ind' column must be present in
            either case.

            Note that depending on these choices, the meaning of the PR evaluation
            is different. In particular, if gt is for a single class but detections
            are for multiple classes, there will be a lot of false positives!

        min_overlap (float): minimum required area of union of area of
            intersection overlap for a true positive.

    Returns:
        (ap,        recall, precision, hard_negatives, sorted_dets): tuple of
        (float, list,     list,            list,                     ndarray),
        where the lists are 0/1 masks onto the sorted dets.
    """
    tt = TicToc().tic()

    # if dets or gt are empty, return 0's
    nd = dets.arr.shape[0]
    if nd < 1 or gt.shape[0] < 1:
        ap = 0
        rec = np.array([0])
        prec = np.array([0])
        hard_negs = np.array([0])
        return (ap, rec, prec, hard_negs)

    # augment gt with a column keeping track of matches
    cols = list(gt.cols) + ["matched"]
    arr = np.zeros((gt.arr.shape[0], gt.arr.shape[1] + 1))
    arr[:, :-1] = gt.arr.copy()
    gt = Table(arr, cols)

    # sort detections by confidence
    dets = dets.copy()
    dets.sort_by_column("score", descending=True)

    # match detections to ground truth objects
    npos = gt.filter_on_column("diff", 0).shape[0]
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    hard_neg = np.zeros(nd)
    for d in range(nd):
        if tt.qtoc() > 15:
            print("... on %d/%d dets" % (d, nd))
            tt.tic()

        det = dets.arr[d, :]

        # find ground truth for this image
        if "img_ind" in gt.cols:
            img_ind = det[dets.ind("img_ind")]
            inds = gt.arr[:, gt.ind("img_ind")] == img_ind
            gt_for_image = gt.arr[inds, :]
        else:
            gt_for_image = gt.arr

        if gt_for_image.shape[0] < 1:
            # false positive due to a det in image that does not contain the class
            # NOTE: this can happen if we're passing ground truth for a class
            fp[d] = 1
            hard_neg[d] = 1
            continue

        # find the maximally overlapping ground truth element for this
        # detection
        overlaps = BoundingBox.get_overlap(gt_for_image[:, :4], det[:4])
        jmax = overlaps.argmax()
        ovmax = overlaps[jmax]

        # assign detection as true positive/don't care/false positive
        if ovmax >= min_overlap:
            if gt_for_image[jmax, gt.ind("diff")]:
                # not a false positive because object is difficult
                None
            else:
                if gt_for_image[jmax, gt.ind("matched")] == 0:
                    if gt_for_image[jmax, gt.ind("cls_ind")] == det[dets.ind("cls_ind")]:
                        # true positive
                        tp[d] = 1
                        gt_for_image[jmax, gt.ind("matched")] = 1
                    else:
                        # false positive due to wrong class
                        fp[d] = 1
                        hard_neg[d] = 1
                else:
                    # false positive due to multiple detection
                    # this is still a correct answer, so not a hard negative
                    fp[d] = 1
        else:
            # false positive due to not matching any ground truth object
            fp[d] = 1
            hard_neg[d] = 1
        # NOTE: must do this for gt.arr to get the changes we made to
        # gt_for_image
        if "img_ind" in gt.cols:
            gt.arr[inds, :] = gt_for_image

    ap, rec, prec = compute_rec_prec_ap(tp, fp, npos)
    return (ap, rec, prec, hard_neg, dets)
  def compute_det_pr_and_hard_neg(cls, dets, gt):
    """
    Take Table of detections and Table of ground truth.
    Ground truth can be for a single image or a whole dataset
    and can contain either all classes or just one class (but the cls_ind col
    must be present in either case).
    Depending on these decisions, the meaning of the PR evaluation is
    different.
    In particular, if gt is for a single class but dets are for multiple
    classes, there will be a lot of false positives!
    NOTE: modifies dets in-place (sorts by score)
    Return ap, recall, and precision vectors as tuple.
    """
    gt = gt.copy()

    # if dets or gt are empty, return 0's
    nd = dets.arr.shape[0]
    if nd < 1 or gt.shape[0] < 1:
      ap = 0
      rec = np.array([0])
      prec = np.array([0])
      return (ap,rec,prec)
    tt = TicToc().tic()

    # augment gt with a column keeping track of matches
    cols = list(gt.cols) + ['matched']
    arr = np.zeros((gt.arr.shape[0],gt.arr.shape[1]+1))
    arr[:,:-1] = gt.arr
    gt = Table(arr,cols)

    # sort detections by confidence
    dets.sort_by_column('score',descending=True)

    # match detections to ground truth objects
    npos = gt.filter_on_column('diff',0).shape[0]
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    hard_neg = np.zeros(nd)
    for d in range(nd):
      if tt.qtoc() > 15:
        print("... on %d/%d dets"%(d,nd))
        tt.tic()

      det = dets.arr[d,:]

      # find ground truth for this image
      if 'img_ind' in gt.cols:
        img_ind = det[dets.ind('img_ind')]
        inds = gt.arr[:,gt.ind('img_ind')] == img_ind
        gt_for_image = gt.arr[inds,:]
      else:
        gt_for_image = gt.arr
      
      if gt_for_image.shape[0] < 1:
        # this can happen if we're passing ground truth for a class
        # false positive due to detection in image that does not contain the class
        fp[d] = 1 
        hard_neg[d] = 1
        continue

      # find the maximally overlapping ground truth element for this detection
      overlaps = BoundingBox.get_overlap(gt_for_image[:,:4],det[:4])
      jmax = overlaps.argmax()
      ovmax = overlaps[jmax]

      # assign detection as true positive/don't care/false positive
      if ovmax >= cls.MIN_OVERLAP:
        if not gt_for_image[jmax,gt.ind('diff')]:
          is_matched = gt_for_image[jmax,gt.ind('matched')]
          if is_matched == 0:
            if gt_for_image[jmax,gt.ind('cls_ind')] == det[dets.ind('cls_ind')]:
              # true positive
              tp[d] = 1
              gt_for_image[jmax,gt.ind('matched')] = 1
            else:
              # false positive due to wrong class
              fp[d] = 1
              hard_neg[d] = 1
          else:
            # false positive due to multiple detection
            # this is still a correct answer, so not a hard negative
            fp[d] = 1
        else:
          None
          # NOT a false positive because object is difficult!
      else:
        # false positive due to not matching any ground truth object
        fp[d] = 1
        hard_neg[d] = 1
      # NOTE: this is very important: otherwise, gt.arr does not get the
      # changes we make to gt_for_image
      if 'img_ind' in gt.cols:
        gt.arr[inds,:] = gt_for_image

    ap,rec,prec = cls.compute_rec_prec_ap(tp,fp,npos)
    return (ap,rec,prec,hard_neg)
Esempio n. 14
0
    def compute_det_pr_and_hard_neg(cls, dets, gt):
        """
    Take Table of detections and Table of ground truth.
    Ground truth can be for a single image or a whole dataset
    and can contain either all classes or just one class (but the cls_ind col
    must be present in either case).
    Depending on these decisions, the meaning of the PR evaluation is
    different.
    In particular, if gt is for a single class but dets are for multiple
    classes, there will be a lot of false positives!
    NOTE: modifies dets in-place (sorts by score)
    Return ap, recall, and precision vectors as tuple.
    """
        gt = gt.copy()

        # if dets or gt are empty, return 0's
        nd = dets.arr.shape[0]
        if nd < 1 or gt.shape[0] < 1:
            ap = 0
            rec = np.array([0])
            prec = np.array([0])
            return (ap, rec, prec)
        tt = TicToc().tic()

        # augment gt with a column keeping track of matches
        cols = list(gt.cols) + ['matched']
        arr = np.zeros((gt.arr.shape[0], gt.arr.shape[1] + 1))
        arr[:, :-1] = gt.arr
        gt = Table(arr, cols)

        # sort detections by confidence
        dets.sort_by_column('score', descending=True)

        # match detections to ground truth objects
        npos = gt.filter_on_column('diff', 0).shape[0]
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        hard_neg = np.zeros(nd)
        for d in range(nd):
            if tt.qtoc() > 15:
                print("... on %d/%d dets" % (d, nd))
                tt.tic()

            det = dets.arr[d, :]

            # find ground truth for this image
            if 'img_ind' in gt.cols:
                img_ind = det[dets.ind('img_ind')]
                inds = gt.arr[:, gt.ind('img_ind')] == img_ind
                gt_for_image = gt.arr[inds, :]
            else:
                gt_for_image = gt.arr

            if gt_for_image.shape[0] < 1:
                # this can happen if we're passing ground truth for a class
                # false positive due to detection in image that does not contain the class
                fp[d] = 1
                hard_neg[d] = 1
                continue

            # find the maximally overlapping ground truth element for this detection
            overlaps = BoundingBox.get_overlap(gt_for_image[:, :4], det[:4])
            jmax = overlaps.argmax()
            ovmax = overlaps[jmax]

            # assign detection as true positive/don't care/false positive
            if ovmax >= cls.MIN_OVERLAP:
                if not gt_for_image[jmax, gt.ind('diff')]:
                    is_matched = gt_for_image[jmax, gt.ind('matched')]
                    if is_matched == 0:
                        if gt_for_image[jmax, gt.ind('cls_ind')] == det[
                                dets.ind('cls_ind')]:
                            # true positive
                            tp[d] = 1
                            gt_for_image[jmax, gt.ind('matched')] = 1
                        else:
                            # false positive due to wrong class
                            fp[d] = 1
                            hard_neg[d] = 1
                    else:
                        # false positive due to multiple detection
                        # this is still a correct answer, so not a hard negative
                        fp[d] = 1
                else:
                    None
                    # NOT a false positive because object is difficult!
            else:
                # false positive due to not matching any ground truth object
                fp[d] = 1
                hard_neg[d] = 1
            # NOTE: this is very important: otherwise, gt.arr does not get the
            # changes we make to gt_for_image
            if 'img_ind' in gt.cols:
                gt.arr[inds, :] = gt_for_image

        ap, rec, prec = cls.compute_rec_prec_ap(tp, fp, npos)
        return (ap, rec, prec, hard_neg)
Esempio n. 15
0
    def get_windows_new(self, image, cls, metaparams=None, with_time=False, at_most=200000, force=False):
        """
    Generate windows by using ground truth window stats and metaparams.
    metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode'
    metaparams['mode'] can be 'linear' or 'importance' and refers to the method
    of sampling intervals per window parameter.
    If with_time=True, return tuple of (windows, time_elapsed).
    """
        if not metaparams:
            metaparams = {
                "samples_per_500px": 83,
                "num_scales": 12,
                "num_ratios": 6,
                "mode": "importance",
                "priority": 0,
            }

        t = time.time()
        x_samples = int(image.width / 500.0 * metaparams["samples_per_500px"])
        y_samples = int(image.height / 500.0 * metaparams["samples_per_500px"])

        # check for cached windows and return if found
        dirname = config.get_sliding_windows_cached_dir(self.train_name)
        filename = "%s_%d_%d_%s_%s_%d_%d_%d.npy" % (
            cls,
            metaparams["samples_per_500px"],
            metaparams["num_scales"],
            metaparams["num_ratios"],
            metaparams["mode"],
            metaparams["priority"],
            x_samples,
            y_samples,
        )
        filename = os.path.join(dirname, filename)
        if os.path.exists(filename) and not force:
            windows = np.load(filename)
        else:
            # fine, we'll figure out the windows again
            # load the kde for x_scaled,y_scaled,scale,log_ratio
            stats = self.get_stats()
            kde = stats["%s_kde" % cls]
            x_frac = kde.dataset[0, :]
            y_frac = kde.dataset[1, :]
            scale = kde.dataset[2, :]
            log_ratio = kde.dataset[3, :]

            # given the metaparameters, sample points to generate the complete list of
            # parameter combinations
            if metaparams["mode"] == "linear":
                x_points = np.linspace(x_frac.min(), x_frac.max(), x_samples)
                y_points = np.linspace(y_frac.min(), y_frac.max(), y_samples)
                scale_points = np.linspace(scale.min(), scale.max(), metaparams["num_scales"])
                ratio_points = np.linspace(log_ratio.min(), log_ratio.max(), metaparams["num_ratios"])
            elif metaparams["mode"] == "importance":
                x_points = ut.importance_sample(x_frac, x_samples, stats["%s_%s_kde" % (cls, "x_frac")])
                y_points = ut.importance_sample(y_frac, y_samples, stats["%s_%s_kde" % (cls, "y_frac")])
                scale_points = ut.importance_sample(
                    scale, metaparams["num_scales"], stats["%s_%s_kde" % (cls, "scale")]
                )
                ratio_points = ut.importance_sample(
                    log_ratio, metaparams["num_ratios"], stats["%s_%s_kde" % (cls, "log_ratio")]
                )
            else:
                raise RuntimeError("Invalid mode")

            combinations = [x for x in itertools.product(x_points, y_points, scale_points, ratio_points)]
            combinations = np.array(combinations).T

            # only take the top-scoring detections
            if metaparams["priority"]:
                t22 = time.time()
                scores = kde(combinations)  # (so slow!)
                print("kde took %.3f s" % (time.time() - t22))
                sorted_inds = np.argsort(-scores)
                max_num = min(at_most, sorted_inds.size)
                combinations = combinations[:, sorted_inds[:max_num]]

            # convert to x,y,scale,ratio,w,h
            scale = combinations[2, :]
            # x = x_frac*img_width
            x = combinations[0, :] * img_width
            # ratio = exp(log_ratio)
            ratio = np.exp(combinations[3, :])
            # y = y_frac*img_height
            y = combinations[1, :] * img_height
            # w = scale*min_width
            w = scale * SlidingWindows.MIN_WIDTH
            # h = w*ratio
            h = w * ratio

            combinations[0, :] = x
            combinations[1, :] = y
            combinations[2, :] = w
            combinations[3, :] = h
            windows = combinations.T
            windows = BoundingBox.clipboxes_arr(windows, (0, 0, img_width, img_height))
            np.save(filename, windows)  # does not take more than 0.5 sec even for 10**6 windows

        time_elapsed = time.time() - t
        print("get_windows_new() got %d windows in %.3fs" % (windows.shape[0], time_elapsed))
        if with_time:
            return (windows, time_elapsed)
        else:
            return windows
Esempio n. 16
0
    def get_recalls(self, cls, metaparams, mode, window_intervals, min_overlaps):
        """
    Return nparray of num_intervals x num_overlaps, with each entry specifying
    the recall for that combination of window_interval and min_overlap.
    window_intervals must begin with 0.
    mode must be in ['sw','jw']
    """
        assert window_intervals[0] == 0
        num_overlaps = len(min_overlaps)
        num_intervals = len(window_intervals)
        times = [0]
        window_nums = [0]
        image_inds = self.dataset.get_pos_samples_for_class(cls)
        num_images = len(image_inds)
        # we are building up a num_images x num_intervals+1 x num_overlaps array
        array = np.zeros((num_images, num_intervals + 1, num_overlaps))
        for i in range(num_images):
            ind = image_inds[i]
            image = self.dataset.images[ind]
            # the first interval is 0, so there aren't any window proposals
            array[i, 0, :] = 0
            gts = image.get_ground_truth(cls)
            num_gt = gts.shape[0]
            # the last row of the matrix is the number of ground truth
            array[i, num_intervals, :] = num_gt
            # now get the windows and append the statistics information
            # windows,time_elapsed = window_generator.get_windows(image,cls,with_time=True)

            if mode == "sw":
                windows, time_elapsed = self.get_windows_new(
                    image, cls, metaparams, with_time=True, at_most=max(window_intervals)
                )
            elif mode == "jw":
                windows, time_elapsed = self.jw.get_windows(image, cls, K=10000)
            else:
                raise RuntimeError("impossible mode")

            # shuffle the windows if we want to take them in random order
            if mode == "sw" and not metaparams["priority"]:
                rand_ind = np.random.permutation(windows.shape[0])
                windows = windows[rand_ind, :]

            window_nums.append(windows.shape[0])
            times.append(time_elapsed)
            # go through each interval and count how many ground truth are matched
            for j in range(1, len(window_intervals)):
                max_ind = window_intervals[j]
                # if we are going to ask for more windows that are available,
                # the recall is going to be the same as before, so just add that
                if max_ind > windows.shape[0]:
                    array[i, j, :] = array[i, j - 1, :]
                    continue
                # otherwise, count the number of ground truths that are overlapped
                # NOTE: a single window can overlap multiple ground truth in this
                # scheme
                for gt in gts.arr:
                    overlaps = BoundingBox.get_overlap(windows[:max_ind, :4], gt[:4])
                    for k, min_overlap in enumerate(min_overlaps):
                        if np.any(overlaps >= min_overlap):
                            array[i, j, k] += 1
        print(
            "Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec"
            % (np.mean(window_nums), np.std(window_nums), np.mean(times), np.std(times))
        )
        # reduce to num_intervals+1 x num_overlaps
        sum_array = np.sum(array, axis=0)
        # reduce to num_intervals x num_overlaps
        recalls = sum_array[:-1, :] / sum_array[-1, :]
        return recalls