def get_neg_windows(self, num, cls=None, window_params=None, max_overlap=0, max_num_images=250): """ Return array of num windows that can be generated with window_params that do not overlap with ground truth by more than max_overlap. * If cls is not given, returns ground truth for all classes. * If max_num_images is given, samples from at most that many images. """ sw = SlidingWindows(self, self) if not window_params: window_params = sw.get_default_window_params(cls) all_windows = [] image_inds = self.get_pos_samples_for_class(cls) max_num = len(image_inds) inds = image_inds if max_num_images: inds = skutil.random_subset(image_inds, max_num_images) num_per_image = round(1. * num / max_num) for ind in inds: image = self.images[ind] windows = image.get_windows(window_params) gts = image.get_ground_truth(cls) for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:, :4], gt[:4]) windows = windows[overlaps <= max_overlap, :] if windows.shape[0] == 0: continue ind_to_take = skutil.random_subset_up_to_N( windows.shape[0], num_per_image) all_windows.append(np.hstack( (windows[ind_to_take, :], np.tile(ind, (ind_to_take.shape[0], 1))))) all_windows = np.concatenate(all_windows, 0) return all_windows[:num, :]
def get_pos_windows(self, cls=None, window_params=None, min_overlap=0.7): """ Return array of all ground truth windows for the class, plus windows that can be generated with window_params that overlap with it by more than min_overlap. * If cls not given, return positive windows for all classes. * If window_params not given, use default for the class. * Adjust min_overlap to fetch fewer windows. """ sw = SlidingWindows(self, self) if not window_params: window_params = sw.get_default_window_params(cls) overlapping_windows = [] image_inds = self.get_pos_samples_for_class(cls) times = [] window_nums = [] for i in image_inds: image = self.images[i] gts = image.get_ground_truth(cls) if gts.arr.shape[0] > 0: overlap_wins = gts.arr[:, :4] overlap_wins = np.hstack( (overlap_wins, np.tile(i, (overlap_wins.shape[0], 1)))) overlapping_windows.append(overlap_wins.astype(int)) windows, time_elapsed = image.get_windows( window_params, with_time=True) window_nums.append(windows.shape[0]) times.append(time_elapsed) for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:, :4], gt[:4]) overlap_wins = windows[overlaps >= min_overlap, :] overlap_wins = np.hstack(( overlap_wins, np.tile(i, (overlap_wins.shape[0], 1)))) overlapping_windows.append(overlap_wins.astype(int)) windows = windows[overlaps < min_overlap, :] overlapping_windows = np.concatenate(overlapping_windows, 0) print("Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec" % ( np.mean(window_nums), np.std(window_nums), np.mean(times), np.std(times))) return overlapping_windows
def compute_det_pr_and_hard_neg(dets, gt, min_overlap=0.5): """ Compute the Precision-Recall and find hard negatives of the given detections for the ground truth. Args: dets (skpyutils.Table): detections. !NOTE: the first four columns must be the bounding box coordinates! gt (skpyutils.Table): detectin ground truth Can be for a single image or a whole dataset, and can contain either all classes or a single class. The 'cls_ind' column must be present in either case. Note that depending on these choices, the meaning of the PR evaluation is different. In particular, if gt is for a single class but detections are for multiple classes, there will be a lot of false positives! min_overlap (float): minimum required area of union of area of intersection overlap for a true positive. Returns: (ap, recall, precision, hard_negatives, sorted_dets): tuple of (float, list, list, list, ndarray), where the lists are 0/1 masks onto the sorted dets. """ tt = TicToc().tic() # if dets or gt are empty, return 0's nd = dets.arr.shape[0] if nd < 1 or gt.shape[0] < 1: ap = 0 rec = np.array([0]) prec = np.array([0]) hard_negs = np.array([0]) return (ap, rec, prec, hard_negs) # augment gt with a column keeping track of matches cols = list(gt.cols) + ["matched"] arr = np.zeros((gt.arr.shape[0], gt.arr.shape[1] + 1)) arr[:, :-1] = gt.arr.copy() gt = Table(arr, cols) # sort detections by confidence dets = dets.copy() dets.sort_by_column("score", descending=True) # match detections to ground truth objects npos = gt.filter_on_column("diff", 0).shape[0] tp = np.zeros(nd) fp = np.zeros(nd) hard_neg = np.zeros(nd) for d in range(nd): if tt.qtoc() > 15: print("... on %d/%d dets" % (d, nd)) tt.tic() det = dets.arr[d, :] # find ground truth for this image if "img_ind" in gt.cols: img_ind = det[dets.ind("img_ind")] inds = gt.arr[:, gt.ind("img_ind")] == img_ind gt_for_image = gt.arr[inds, :] else: gt_for_image = gt.arr if gt_for_image.shape[0] < 1: # false positive due to a det in image that does not contain the class # NOTE: this can happen if we're passing ground truth for a class fp[d] = 1 hard_neg[d] = 1 continue # find the maximally overlapping ground truth element for this # detection overlaps = BoundingBox.get_overlap(gt_for_image[:, :4], det[:4]) jmax = overlaps.argmax() ovmax = overlaps[jmax] # assign detection as true positive/don't care/false positive if ovmax >= min_overlap: if gt_for_image[jmax, gt.ind("diff")]: # not a false positive because object is difficult None else: if gt_for_image[jmax, gt.ind("matched")] == 0: if gt_for_image[jmax, gt.ind("cls_ind")] == det[dets.ind("cls_ind")]: # true positive tp[d] = 1 gt_for_image[jmax, gt.ind("matched")] = 1 else: # false positive due to wrong class fp[d] = 1 hard_neg[d] = 1 else: # false positive due to multiple detection # this is still a correct answer, so not a hard negative fp[d] = 1 else: # false positive due to not matching any ground truth object fp[d] = 1 hard_neg[d] = 1 # NOTE: must do this for gt.arr to get the changes we made to # gt_for_image if "img_ind" in gt.cols: gt.arr[inds, :] = gt_for_image ap, rec, prec = compute_rec_prec_ap(tp, fp, npos) return (ap, rec, prec, hard_neg, dets)
def compute_det_pr_and_hard_neg(cls, dets, gt): """ Take Table of detections and Table of ground truth. Ground truth can be for a single image or a whole dataset and can contain either all classes or just one class (but the cls_ind col must be present in either case). Depending on these decisions, the meaning of the PR evaluation is different. In particular, if gt is for a single class but dets are for multiple classes, there will be a lot of false positives! NOTE: modifies dets in-place (sorts by score) Return ap, recall, and precision vectors as tuple. """ gt = gt.copy() # if dets or gt are empty, return 0's nd = dets.arr.shape[0] if nd < 1 or gt.shape[0] < 1: ap = 0 rec = np.array([0]) prec = np.array([0]) return (ap,rec,prec) tt = TicToc().tic() # augment gt with a column keeping track of matches cols = list(gt.cols) + ['matched'] arr = np.zeros((gt.arr.shape[0],gt.arr.shape[1]+1)) arr[:,:-1] = gt.arr gt = Table(arr,cols) # sort detections by confidence dets.sort_by_column('score',descending=True) # match detections to ground truth objects npos = gt.filter_on_column('diff',0).shape[0] tp = np.zeros(nd) fp = np.zeros(nd) hard_neg = np.zeros(nd) for d in range(nd): if tt.qtoc() > 15: print("... on %d/%d dets"%(d,nd)) tt.tic() det = dets.arr[d,:] # find ground truth for this image if 'img_ind' in gt.cols: img_ind = det[dets.ind('img_ind')] inds = gt.arr[:,gt.ind('img_ind')] == img_ind gt_for_image = gt.arr[inds,:] else: gt_for_image = gt.arr if gt_for_image.shape[0] < 1: # this can happen if we're passing ground truth for a class # false positive due to detection in image that does not contain the class fp[d] = 1 hard_neg[d] = 1 continue # find the maximally overlapping ground truth element for this detection overlaps = BoundingBox.get_overlap(gt_for_image[:,:4],det[:4]) jmax = overlaps.argmax() ovmax = overlaps[jmax] # assign detection as true positive/don't care/false positive if ovmax >= cls.MIN_OVERLAP: if not gt_for_image[jmax,gt.ind('diff')]: is_matched = gt_for_image[jmax,gt.ind('matched')] if is_matched == 0: if gt_for_image[jmax,gt.ind('cls_ind')] == det[dets.ind('cls_ind')]: # true positive tp[d] = 1 gt_for_image[jmax,gt.ind('matched')] = 1 else: # false positive due to wrong class fp[d] = 1 hard_neg[d] = 1 else: # false positive due to multiple detection # this is still a correct answer, so not a hard negative fp[d] = 1 else: None # NOT a false positive because object is difficult! else: # false positive due to not matching any ground truth object fp[d] = 1 hard_neg[d] = 1 # NOTE: this is very important: otherwise, gt.arr does not get the # changes we make to gt_for_image if 'img_ind' in gt.cols: gt.arr[inds,:] = gt_for_image ap,rec,prec = cls.compute_rec_prec_ap(tp,fp,npos) return (ap,rec,prec,hard_neg)
def get_recalls(self, cls, metaparams, mode, window_intervals, min_overlaps): """ Return nparray of num_intervals x num_overlaps, with each entry specifying the recall for that combination of window_interval and min_overlap. window_intervals must begin with 0. mode must be in ['sw','jw'] """ assert window_intervals[0] == 0 num_overlaps = len(min_overlaps) num_intervals = len(window_intervals) times = [0] window_nums = [0] image_inds = self.dataset.get_pos_samples_for_class(cls) num_images = len(image_inds) # we are building up a num_images x num_intervals+1 x num_overlaps array array = np.zeros((num_images, num_intervals + 1, num_overlaps)) for i in range(num_images): ind = image_inds[i] image = self.dataset.images[ind] # the first interval is 0, so there aren't any window proposals array[i, 0, :] = 0 gts = image.get_ground_truth(cls) num_gt = gts.shape[0] # the last row of the matrix is the number of ground truth array[i, num_intervals, :] = num_gt # now get the windows and append the statistics information # windows,time_elapsed = window_generator.get_windows(image,cls,with_time=True) if mode == "sw": windows, time_elapsed = self.get_windows_new( image, cls, metaparams, with_time=True, at_most=max(window_intervals) ) elif mode == "jw": windows, time_elapsed = self.jw.get_windows(image, cls, K=10000) else: raise RuntimeError("impossible mode") # shuffle the windows if we want to take them in random order if mode == "sw" and not metaparams["priority"]: rand_ind = np.random.permutation(windows.shape[0]) windows = windows[rand_ind, :] window_nums.append(windows.shape[0]) times.append(time_elapsed) # go through each interval and count how many ground truth are matched for j in range(1, len(window_intervals)): max_ind = window_intervals[j] # if we are going to ask for more windows that are available, # the recall is going to be the same as before, so just add that if max_ind > windows.shape[0]: array[i, j, :] = array[i, j - 1, :] continue # otherwise, count the number of ground truths that are overlapped # NOTE: a single window can overlap multiple ground truth in this # scheme for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:max_ind, :4], gt[:4]) for k, min_overlap in enumerate(min_overlaps): if np.any(overlaps >= min_overlap): array[i, j, k] += 1 print( "Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec" % (np.mean(window_nums), np.std(window_nums), np.mean(times), np.std(times)) ) # reduce to num_intervals+1 x num_overlaps sum_array = np.sum(array, axis=0) # reduce to num_intervals x num_overlaps recalls = sum_array[:-1, :] / sum_array[-1, :] return recalls