def load_dpm_dets_for_image(cls, image, dataset, suffix='dets_all_may25_DP'): """ Loads multi-class array of detections for an image from .mat format. """ t = time.time() name = os.path.splitext(image.name)[0] # TODO: figure out how to deal with different types of detections dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP' filename = os.path.join(dets_dir, '%s_dets_all_may25_DP.mat' % name) if not os.path.exists(filename): dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval' filename = os.path.join(dets_dir, '%s_dets_all_jun1_DP_trainval.mat' % name) if not os.path.exists(filename): filename = os.path.join(config.test_support_dir, 'dets/%s_dets_all_may25_DP.mat' % name) if not os.path.exists(filename): print("File does not exist!") return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] # feat_time = times[0,0] dets_seq = [] cols = [ 'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy', 'score', 'time' ] for cls_ind, cls in enumerate(config.pascal_classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0] > 0: det_time = times[cls_ind, 1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # subtract 1 pixel and convert from corners! cls_dets[:, :4] -= 1 cls_dets[:, :4] = BoundingBox.convert_arr_from_corners( cls_dets[:, :4]) dets_seq.append(cls_dets) cols = [ 'x', 'y', 'w', 'h', 'dummy', 'dummy', 'dummy', 'dummy', 'score', 'time', 'cls_ind' ] # NMS detections per class individually dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols}) dets_mc[:, :4] = BoundingBox.clipboxes_arr( dets_mc[:, :4], (0, 0, image.size[0] - 1, image.size[1] - 1)) time_elapsed = time.time() - t print("On image %s, took %.3f s" % (image.name, time_elapsed)) return dets_mc
def load_csc_dpm_dets_for_image(cls, image, dataset): """ Loads HOS's cascaded dets. """ t = time.time() name = os.path.splitext(image.name)[0] # if uest dataset, use HOS's detections. if not, need to output my own if re.search('test', dataset.name): dirname = config.get_dets_test_wholeset_dir() filename = os.path.join( dirname, '%s_dets_all_test_original_cascade_wholeset.mat' % name) else: dirname = config.get_dets_nov19() filename = os.path.join(dirname, '%s_dets_all_nov19.mat' % name) print filename if not os.path.exists(filename): raise RuntimeError("File %s does not exist!" % filename) return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] # feat_time = times[0,0] dets_seq = [] cols = [ 'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy', 'dummy', 'dummy', 'score' ] for cls_ind, cls in enumerate(dataset.classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0] > 0: good_ind = [0, 1, 2, 3, 10] cls_dets = cls_dets[:, good_ind] det_time = times[cls_ind, 1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # convert from corners! cls_dets[:, :4] = BoundingBox.convert_arr_from_corners( cls_dets[:, :4]) cls_dets[:, :4] = BoundingBox.clipboxes_arr( cls_dets[:, :4], (0, 0, image.size[0], image.size[1])) dets_seq.append(cls_dets) cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind'] dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols}) time_elapsed = time.time() - t print("On image %s, took %.3f s" % (image.name, time_elapsed)) return dets_mc
def test_load_dpm_detections(self): conf = dict(self.config) conf['detectors'] = ['dpm'] policy = DatasetPolicy(self.dataset, self.train_dataset, **conf) assert (policy.detectors == ['dpm']) dets = policy.load_ext_detections(self.dataset, 'dpm_may25', force=True) dets = dets.with_column_omitted('time') # load the ground truth dets, processed in Matlab # (timely/data/test_support/concat_dets.m) filename = os.path.join(config.test_support_dir, 'val_dets.mat') dets_correct = Table( scipy.io.loadmat(filename)['dets'], [ 'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy', 'score', 'cls_ind', 'img_ind' ], 'dets_correct') dets_correct = dets_correct.subset( ['x1', 'y1', 'x2', 'y2', 'score', 'cls_ind', 'img_ind']) dets_correct.arr[:, :4] -= 1 dets_correct.arr[:, :4] = BoundingBox.convert_arr_from_corners( dets_correct.arr[:, :4]) dets_correct.cols = ['x', 'y', 'w', 'h', 'score', 'cls_ind', 'img_ind'] print('----mine:') print(dets) print('----correct:') print(dets_correct) assert (dets_correct == dets)
def test_load_dpm_detections(self): conf = dict(self.config) conf["detectors"] = ["dpm"] policy = DatasetPolicy(self.dataset, self.train_dataset, **conf) assert policy.detectors == ["dpm"] dets = policy.load_ext_detections(self.dataset, "dpm_may25", force=True) dets = dets.with_column_omitted("time") # load the ground truth dets, processed in Matlab # (timely/data/test_support/concat_dets.m) filename = os.path.join(config.test_support_dir, "val_dets.mat") dets_correct = Table( scipy.io.loadmat(filename)["dets"], ["x1", "y1", "x2", "y2", "dummy", "dummy", "dummy", "dummy", "score", "cls_ind", "img_ind"], "dets_correct", ) dets_correct = dets_correct.subset(["x1", "y1", "x2", "y2", "score", "cls_ind", "img_ind"]) dets_correct.arr[:, :4] -= 1 dets_correct.arr[:, :4] = BoundingBox.convert_arr_from_corners(dets_correct.arr[:, :4]) dets_correct.cols = ["x", "y", "w", "h", "score", "cls_ind", "img_ind"] print ("----mine:") print (dets) print ("----correct:") print (dets_correct) assert dets_correct == dets
def get_neg_windows(self, num, cls=None, window_params=None, max_overlap=0, max_num_images=250): """ Return array of num windows that can be generated with window_params that do not overlap with ground truth by more than max_overlap. * If cls is not given, returns ground truth for all classes. * If max_num_images is given, samples from at most that many images. """ sw = SlidingWindows(self, self) if not window_params: window_params = sw.get_default_window_params(cls) all_windows = [] image_inds = self.get_pos_samples_for_class(cls) max_num = len(image_inds) inds = image_inds if max_num_images: inds = skutil.random_subset(image_inds, max_num_images) num_per_image = round(1. * num / max_num) for ind in inds: image = self.images[ind] windows = image.get_windows(window_params) gts = image.get_ground_truth(cls) for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:, :4], gt[:4]) windows = windows[overlaps <= max_overlap, :] if windows.shape[0] == 0: continue ind_to_take = skutil.random_subset_up_to_N( windows.shape[0], num_per_image) all_windows.append(np.hstack( (windows[ind_to_take, :], np.tile(ind, (ind_to_take.shape[0], 1))))) all_windows = np.concatenate(all_windows, 0) return all_windows[:num, :]
def load_dpm_dets_for_image(cls, image, dataset, suffix='dets_all_may25_DP'): """ Loads multi-class array of detections for an image from .mat format. """ t = time.time() name = os.path.splitext(image.name)[0] # TODO: figure out how to deal with different types of detections dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP' filename = os.path.join(dets_dir, '%s_dets_all_may25_DP.mat' % name) if not os.path.exists(filename): dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval' filename = os.path.join( dets_dir, '%s_dets_all_jun1_DP_trainval.mat' % name) if not os.path.exists(filename): filename = os.path.join( config.test_support_dir, 'dets/%s_dets_all_may25_DP.mat' % name) if not os.path.exists(filename): print("File does not exist!") return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] # feat_time = times[0,0] dets_seq = [] cols = ['x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy', 'score', 'time'] for cls_ind, cls in enumerate(config.pascal_classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0] > 0: det_time = times[cls_ind, 1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # subtract 1 pixel and convert from corners! cls_dets[:, :4] -= 1 cls_dets[:, : 4] = BoundingBox.convert_arr_from_corners(cls_dets[:, :4]) dets_seq.append(cls_dets) cols = ['x', 'y', 'w', 'h', 'dummy', 'dummy', 'dummy', 'dummy', 'score', 'time', 'cls_ind'] # NMS detections per class individually dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols}) dets_mc[:, :4] = BoundingBox.clipboxes_arr( dets_mc[:, :4], (0, 0, image.size[0] - 1, image.size[1] - 1)) time_elapsed = time.time() - t print("On image %s, took %.3f s" % (image.name, time_elapsed)) return dets_mc
def load_from_pascal_xml_filename(cls, classes, filename, images_dir): "Load image info from a file in the PASCAL VOC XML format." def get_data_from_tag(node, tag): if tag is "bndbox": x1 = int(node.getElementsByTagName( tag)[0].childNodes[1].childNodes[0].data) y1 = int(node.getElementsByTagName( tag)[0].childNodes[3].childNodes[0].data) x2 = int(node.getElementsByTagName( tag)[0].childNodes[5].childNodes[0].data) y2 = int(node.getElementsByTagName( tag)[0].childNodes[7].childNodes[0].data) return (x1, y1, x2, y2) else: return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) # image info name = get_data_from_tag(data, "filename") filename = os.path.join(images_dir, name) size = data.getElementsByTagName("size")[0] im_width = int(get_data_from_tag(size, "width")) im_height = int(get_data_from_tag(size, "height")) width = im_width height = im_height img = Image(width, height, classes, name) # per-object info objects = [] for obj in data.getElementsByTagName("object"): clas = str(get_data_from_tag(obj, "name")).lower().strip() diff = int(get_data_from_tag(obj, "difficult")) trun = int(get_data_from_tag(obj, "truncated")) rect = get_data_from_tag(obj, "bndbox") bbox = BoundingBox(rect, format='corners') cls_ind = classes.index(clas) objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun))) if len(objects) > 0: img.objects_table = Table(np.array(objects), cls.columns) else: img.objects_table = Table(None, cls.columns) return img
def load_csc_dpm_dets_for_image(cls, image, dataset): """ Loads HOS's cascaded dets. """ t = time.time() name = os.path.splitext(image.name)[0] # if uest dataset, use HOS's detections. if not, need to output my own if re.search('test', dataset.name): dirname = config.get_dets_test_wholeset_dir() filename = os.path.join( dirname, '%s_dets_all_test_original_cascade_wholeset.mat' % name) else: dirname = config.get_dets_nov19() filename = os.path.join(dirname, '%s_dets_all_nov19.mat' % name) print filename if not os.path.exists(filename): raise RuntimeError("File %s does not exist!" % filename) return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] # feat_time = times[0,0] dets_seq = [] cols = ['x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy', 'dummy', 'dummy', 'score'] for cls_ind, cls in enumerate(dataset.classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0] > 0: good_ind = [0, 1, 2, 3, 10] cls_dets = cls_dets[:, good_ind] det_time = times[cls_ind, 1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # convert from corners! cls_dets[:, : 4] = BoundingBox.convert_arr_from_corners(cls_dets[:, :4]) cls_dets[:, :4] = BoundingBox.clipboxes_arr( cls_dets[:, :4], (0, 0, image.size[0], image.size[1])) dets_seq.append(cls_dets) cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind'] dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols}) time_elapsed = time.time() - t print("On image %s, took %.3f s" % (image.name, time_elapsed)) return dets_mc
def load_from_json(cls, classes, data): "Return an Image instantiated from a JSON representation." name = data['name'] width = data['size'][0] height = data['size'][1] img = Image(width, height, classes, name) objects = [] for obj in data['objects']: bbox = BoundingBox(obj['bbox']) cls_name = obj['class'] cls_ind = classes.index(cls_name) diff = obj['diff'] trun = obj['trun'] objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun))) if len(objects) > 0: img.objects_table = Table(np.array(objects), cls.columns) else: img.objects_table = Table(None, cls.columns) return img
def get_windows(clas, image, cls=None, window_params=None, with_time=False): """ Return all windows that can be generated with window_params. If with_time=True, return tuple of (windows, time_elapsed). """ assert cls or window_params if not window_params: window_params = self.get_default_window_params(cls) t = time.time() stride = window_params.stride min_width = window_params.min_width actual_xs = [] actual_ys = [] actual_ws = [] actual_hs = [] num_windows = 0 # we want to be able to capture objects that extend past the image # we always iterate over locations in native space, and convert to # actual image space when we record the window w_pad = int(1.0 * min_width / 2) x_min = -w_pad for scale in window_params.scales: x_max = int(image.width * scale) - w_pad if w_pad > 0: x_max += stride actual_w = int(min_width / scale) + 1 for ratio in window_params.aspect_ratios: h_pad = int(1.0 * min_width * ratio / 2) y_min = -h_pad y_max = int(image.height * scale) - h_pad if h_pad > 0: y_max += stride actual_h = int(min_width / scale * ratio) + 1 for y in range(y_min, y_max, stride): for x in range(x_min, x_max, stride): actual_ws.append(actual_w) actual_hs.append(actual_h) actual_xs.append(int(x / scale)) actual_ys.append(int(y / scale)) windows = np.array([actual_xs, actual_ys, actual_ws, actual_hs]).T windows = BoundingBox.clipboxes_arr(windows, (0, 0, image.width, image.height)) if with_time: time_elapsed = time.time() - t return (windows, time_elapsed) else: return windows
def get_pos_windows(self, cls=None, window_params=None, min_overlap=0.7): """ Return array of all ground truth windows for the class, plus windows that can be generated with window_params that overlap with it by more than min_overlap. * If cls not given, return positive windows for all classes. * If window_params not given, use default for the class. * Adjust min_overlap to fetch fewer windows. """ sw = SlidingWindows(self, self) if not window_params: window_params = sw.get_default_window_params(cls) overlapping_windows = [] image_inds = self.get_pos_samples_for_class(cls) times = [] window_nums = [] for i in image_inds: image = self.images[i] gts = image.get_ground_truth(cls) if gts.arr.shape[0] > 0: overlap_wins = gts.arr[:, :4] overlap_wins = np.hstack( (overlap_wins, np.tile(i, (overlap_wins.shape[0], 1)))) overlapping_windows.append(overlap_wins.astype(int)) windows, time_elapsed = image.get_windows( window_params, with_time=True) window_nums.append(windows.shape[0]) times.append(time_elapsed) for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:, :4], gt[:4]) overlap_wins = windows[overlaps >= min_overlap, :] overlap_wins = np.hstack(( overlap_wins, np.tile(i, (overlap_wins.shape[0], 1)))) overlapping_windows.append(overlap_wins.astype(int)) windows = windows[overlaps < min_overlap, :] overlapping_windows = np.concatenate(overlapping_windows, 0) print("Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec" % ( np.mean(window_nums), np.std(window_nums), np.mean(times), np.std(times))) return overlapping_windows
def compute_det_pr_and_hard_neg(dets, gt, min_overlap=0.5): """ Compute the Precision-Recall and find hard negatives of the given detections for the ground truth. Args: dets (skpyutils.Table): detections. !NOTE: the first four columns must be the bounding box coordinates! gt (skpyutils.Table): detectin ground truth Can be for a single image or a whole dataset, and can contain either all classes or a single class. The 'cls_ind' column must be present in either case. Note that depending on these choices, the meaning of the PR evaluation is different. In particular, if gt is for a single class but detections are for multiple classes, there will be a lot of false positives! min_overlap (float): minimum required area of union of area of intersection overlap for a true positive. Returns: (ap, recall, precision, hard_negatives, sorted_dets): tuple of (float, list, list, list, ndarray), where the lists are 0/1 masks onto the sorted dets. """ tt = TicToc().tic() # if dets or gt are empty, return 0's nd = dets.arr.shape[0] if nd < 1 or gt.shape[0] < 1: ap = 0 rec = np.array([0]) prec = np.array([0]) hard_negs = np.array([0]) return (ap, rec, prec, hard_negs) # augment gt with a column keeping track of matches cols = list(gt.cols) + ["matched"] arr = np.zeros((gt.arr.shape[0], gt.arr.shape[1] + 1)) arr[:, :-1] = gt.arr.copy() gt = Table(arr, cols) # sort detections by confidence dets = dets.copy() dets.sort_by_column("score", descending=True) # match detections to ground truth objects npos = gt.filter_on_column("diff", 0).shape[0] tp = np.zeros(nd) fp = np.zeros(nd) hard_neg = np.zeros(nd) for d in range(nd): if tt.qtoc() > 15: print("... on %d/%d dets" % (d, nd)) tt.tic() det = dets.arr[d, :] # find ground truth for this image if "img_ind" in gt.cols: img_ind = det[dets.ind("img_ind")] inds = gt.arr[:, gt.ind("img_ind")] == img_ind gt_for_image = gt.arr[inds, :] else: gt_for_image = gt.arr if gt_for_image.shape[0] < 1: # false positive due to a det in image that does not contain the class # NOTE: this can happen if we're passing ground truth for a class fp[d] = 1 hard_neg[d] = 1 continue # find the maximally overlapping ground truth element for this # detection overlaps = BoundingBox.get_overlap(gt_for_image[:, :4], det[:4]) jmax = overlaps.argmax() ovmax = overlaps[jmax] # assign detection as true positive/don't care/false positive if ovmax >= min_overlap: if gt_for_image[jmax, gt.ind("diff")]: # not a false positive because object is difficult None else: if gt_for_image[jmax, gt.ind("matched")] == 0: if gt_for_image[jmax, gt.ind("cls_ind")] == det[dets.ind("cls_ind")]: # true positive tp[d] = 1 gt_for_image[jmax, gt.ind("matched")] = 1 else: # false positive due to wrong class fp[d] = 1 hard_neg[d] = 1 else: # false positive due to multiple detection # this is still a correct answer, so not a hard negative fp[d] = 1 else: # false positive due to not matching any ground truth object fp[d] = 1 hard_neg[d] = 1 # NOTE: must do this for gt.arr to get the changes we made to # gt_for_image if "img_ind" in gt.cols: gt.arr[inds, :] = gt_for_image ap, rec, prec = compute_rec_prec_ap(tp, fp, npos) return (ap, rec, prec, hard_neg, dets)
def compute_det_pr_and_hard_neg(cls, dets, gt): """ Take Table of detections and Table of ground truth. Ground truth can be for a single image or a whole dataset and can contain either all classes or just one class (but the cls_ind col must be present in either case). Depending on these decisions, the meaning of the PR evaluation is different. In particular, if gt is for a single class but dets are for multiple classes, there will be a lot of false positives! NOTE: modifies dets in-place (sorts by score) Return ap, recall, and precision vectors as tuple. """ gt = gt.copy() # if dets or gt are empty, return 0's nd = dets.arr.shape[0] if nd < 1 or gt.shape[0] < 1: ap = 0 rec = np.array([0]) prec = np.array([0]) return (ap,rec,prec) tt = TicToc().tic() # augment gt with a column keeping track of matches cols = list(gt.cols) + ['matched'] arr = np.zeros((gt.arr.shape[0],gt.arr.shape[1]+1)) arr[:,:-1] = gt.arr gt = Table(arr,cols) # sort detections by confidence dets.sort_by_column('score',descending=True) # match detections to ground truth objects npos = gt.filter_on_column('diff',0).shape[0] tp = np.zeros(nd) fp = np.zeros(nd) hard_neg = np.zeros(nd) for d in range(nd): if tt.qtoc() > 15: print("... on %d/%d dets"%(d,nd)) tt.tic() det = dets.arr[d,:] # find ground truth for this image if 'img_ind' in gt.cols: img_ind = det[dets.ind('img_ind')] inds = gt.arr[:,gt.ind('img_ind')] == img_ind gt_for_image = gt.arr[inds,:] else: gt_for_image = gt.arr if gt_for_image.shape[0] < 1: # this can happen if we're passing ground truth for a class # false positive due to detection in image that does not contain the class fp[d] = 1 hard_neg[d] = 1 continue # find the maximally overlapping ground truth element for this detection overlaps = BoundingBox.get_overlap(gt_for_image[:,:4],det[:4]) jmax = overlaps.argmax() ovmax = overlaps[jmax] # assign detection as true positive/don't care/false positive if ovmax >= cls.MIN_OVERLAP: if not gt_for_image[jmax,gt.ind('diff')]: is_matched = gt_for_image[jmax,gt.ind('matched')] if is_matched == 0: if gt_for_image[jmax,gt.ind('cls_ind')] == det[dets.ind('cls_ind')]: # true positive tp[d] = 1 gt_for_image[jmax,gt.ind('matched')] = 1 else: # false positive due to wrong class fp[d] = 1 hard_neg[d] = 1 else: # false positive due to multiple detection # this is still a correct answer, so not a hard negative fp[d] = 1 else: None # NOT a false positive because object is difficult! else: # false positive due to not matching any ground truth object fp[d] = 1 hard_neg[d] = 1 # NOTE: this is very important: otherwise, gt.arr does not get the # changes we make to gt_for_image if 'img_ind' in gt.cols: gt.arr[inds,:] = gt_for_image ap,rec,prec = cls.compute_rec_prec_ap(tp,fp,npos) return (ap,rec,prec,hard_neg)
def compute_det_pr_and_hard_neg(cls, dets, gt): """ Take Table of detections and Table of ground truth. Ground truth can be for a single image or a whole dataset and can contain either all classes or just one class (but the cls_ind col must be present in either case). Depending on these decisions, the meaning of the PR evaluation is different. In particular, if gt is for a single class but dets are for multiple classes, there will be a lot of false positives! NOTE: modifies dets in-place (sorts by score) Return ap, recall, and precision vectors as tuple. """ gt = gt.copy() # if dets or gt are empty, return 0's nd = dets.arr.shape[0] if nd < 1 or gt.shape[0] < 1: ap = 0 rec = np.array([0]) prec = np.array([0]) return (ap, rec, prec) tt = TicToc().tic() # augment gt with a column keeping track of matches cols = list(gt.cols) + ['matched'] arr = np.zeros((gt.arr.shape[0], gt.arr.shape[1] + 1)) arr[:, :-1] = gt.arr gt = Table(arr, cols) # sort detections by confidence dets.sort_by_column('score', descending=True) # match detections to ground truth objects npos = gt.filter_on_column('diff', 0).shape[0] tp = np.zeros(nd) fp = np.zeros(nd) hard_neg = np.zeros(nd) for d in range(nd): if tt.qtoc() > 15: print("... on %d/%d dets" % (d, nd)) tt.tic() det = dets.arr[d, :] # find ground truth for this image if 'img_ind' in gt.cols: img_ind = det[dets.ind('img_ind')] inds = gt.arr[:, gt.ind('img_ind')] == img_ind gt_for_image = gt.arr[inds, :] else: gt_for_image = gt.arr if gt_for_image.shape[0] < 1: # this can happen if we're passing ground truth for a class # false positive due to detection in image that does not contain the class fp[d] = 1 hard_neg[d] = 1 continue # find the maximally overlapping ground truth element for this detection overlaps = BoundingBox.get_overlap(gt_for_image[:, :4], det[:4]) jmax = overlaps.argmax() ovmax = overlaps[jmax] # assign detection as true positive/don't care/false positive if ovmax >= cls.MIN_OVERLAP: if not gt_for_image[jmax, gt.ind('diff')]: is_matched = gt_for_image[jmax, gt.ind('matched')] if is_matched == 0: if gt_for_image[jmax, gt.ind('cls_ind')] == det[ dets.ind('cls_ind')]: # true positive tp[d] = 1 gt_for_image[jmax, gt.ind('matched')] = 1 else: # false positive due to wrong class fp[d] = 1 hard_neg[d] = 1 else: # false positive due to multiple detection # this is still a correct answer, so not a hard negative fp[d] = 1 else: None # NOT a false positive because object is difficult! else: # false positive due to not matching any ground truth object fp[d] = 1 hard_neg[d] = 1 # NOTE: this is very important: otherwise, gt.arr does not get the # changes we make to gt_for_image if 'img_ind' in gt.cols: gt.arr[inds, :] = gt_for_image ap, rec, prec = cls.compute_rec_prec_ap(tp, fp, npos) return (ap, rec, prec, hard_neg)
def get_windows_new(self, image, cls, metaparams=None, with_time=False, at_most=200000, force=False): """ Generate windows by using ground truth window stats and metaparams. metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode' metaparams['mode'] can be 'linear' or 'importance' and refers to the method of sampling intervals per window parameter. If with_time=True, return tuple of (windows, time_elapsed). """ if not metaparams: metaparams = { "samples_per_500px": 83, "num_scales": 12, "num_ratios": 6, "mode": "importance", "priority": 0, } t = time.time() x_samples = int(image.width / 500.0 * metaparams["samples_per_500px"]) y_samples = int(image.height / 500.0 * metaparams["samples_per_500px"]) # check for cached windows and return if found dirname = config.get_sliding_windows_cached_dir(self.train_name) filename = "%s_%d_%d_%s_%s_%d_%d_%d.npy" % ( cls, metaparams["samples_per_500px"], metaparams["num_scales"], metaparams["num_ratios"], metaparams["mode"], metaparams["priority"], x_samples, y_samples, ) filename = os.path.join(dirname, filename) if os.path.exists(filename) and not force: windows = np.load(filename) else: # fine, we'll figure out the windows again # load the kde for x_scaled,y_scaled,scale,log_ratio stats = self.get_stats() kde = stats["%s_kde" % cls] x_frac = kde.dataset[0, :] y_frac = kde.dataset[1, :] scale = kde.dataset[2, :] log_ratio = kde.dataset[3, :] # given the metaparameters, sample points to generate the complete list of # parameter combinations if metaparams["mode"] == "linear": x_points = np.linspace(x_frac.min(), x_frac.max(), x_samples) y_points = np.linspace(y_frac.min(), y_frac.max(), y_samples) scale_points = np.linspace(scale.min(), scale.max(), metaparams["num_scales"]) ratio_points = np.linspace(log_ratio.min(), log_ratio.max(), metaparams["num_ratios"]) elif metaparams["mode"] == "importance": x_points = ut.importance_sample(x_frac, x_samples, stats["%s_%s_kde" % (cls, "x_frac")]) y_points = ut.importance_sample(y_frac, y_samples, stats["%s_%s_kde" % (cls, "y_frac")]) scale_points = ut.importance_sample( scale, metaparams["num_scales"], stats["%s_%s_kde" % (cls, "scale")] ) ratio_points = ut.importance_sample( log_ratio, metaparams["num_ratios"], stats["%s_%s_kde" % (cls, "log_ratio")] ) else: raise RuntimeError("Invalid mode") combinations = [x for x in itertools.product(x_points, y_points, scale_points, ratio_points)] combinations = np.array(combinations).T # only take the top-scoring detections if metaparams["priority"]: t22 = time.time() scores = kde(combinations) # (so slow!) print("kde took %.3f s" % (time.time() - t22)) sorted_inds = np.argsort(-scores) max_num = min(at_most, sorted_inds.size) combinations = combinations[:, sorted_inds[:max_num]] # convert to x,y,scale,ratio,w,h scale = combinations[2, :] # x = x_frac*img_width x = combinations[0, :] * img_width # ratio = exp(log_ratio) ratio = np.exp(combinations[3, :]) # y = y_frac*img_height y = combinations[1, :] * img_height # w = scale*min_width w = scale * SlidingWindows.MIN_WIDTH # h = w*ratio h = w * ratio combinations[0, :] = x combinations[1, :] = y combinations[2, :] = w combinations[3, :] = h windows = combinations.T windows = BoundingBox.clipboxes_arr(windows, (0, 0, img_width, img_height)) np.save(filename, windows) # does not take more than 0.5 sec even for 10**6 windows time_elapsed = time.time() - t print("get_windows_new() got %d windows in %.3fs" % (windows.shape[0], time_elapsed)) if with_time: return (windows, time_elapsed) else: return windows
def get_recalls(self, cls, metaparams, mode, window_intervals, min_overlaps): """ Return nparray of num_intervals x num_overlaps, with each entry specifying the recall for that combination of window_interval and min_overlap. window_intervals must begin with 0. mode must be in ['sw','jw'] """ assert window_intervals[0] == 0 num_overlaps = len(min_overlaps) num_intervals = len(window_intervals) times = [0] window_nums = [0] image_inds = self.dataset.get_pos_samples_for_class(cls) num_images = len(image_inds) # we are building up a num_images x num_intervals+1 x num_overlaps array array = np.zeros((num_images, num_intervals + 1, num_overlaps)) for i in range(num_images): ind = image_inds[i] image = self.dataset.images[ind] # the first interval is 0, so there aren't any window proposals array[i, 0, :] = 0 gts = image.get_ground_truth(cls) num_gt = gts.shape[0] # the last row of the matrix is the number of ground truth array[i, num_intervals, :] = num_gt # now get the windows and append the statistics information # windows,time_elapsed = window_generator.get_windows(image,cls,with_time=True) if mode == "sw": windows, time_elapsed = self.get_windows_new( image, cls, metaparams, with_time=True, at_most=max(window_intervals) ) elif mode == "jw": windows, time_elapsed = self.jw.get_windows(image, cls, K=10000) else: raise RuntimeError("impossible mode") # shuffle the windows if we want to take them in random order if mode == "sw" and not metaparams["priority"]: rand_ind = np.random.permutation(windows.shape[0]) windows = windows[rand_ind, :] window_nums.append(windows.shape[0]) times.append(time_elapsed) # go through each interval and count how many ground truth are matched for j in range(1, len(window_intervals)): max_ind = window_intervals[j] # if we are going to ask for more windows that are available, # the recall is going to be the same as before, so just add that if max_ind > windows.shape[0]: array[i, j, :] = array[i, j - 1, :] continue # otherwise, count the number of ground truths that are overlapped # NOTE: a single window can overlap multiple ground truth in this # scheme for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:max_ind, :4], gt[:4]) for k, min_overlap in enumerate(min_overlaps): if np.any(overlaps >= min_overlap): array[i, j, k] += 1 print( "Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec" % (np.mean(window_nums), np.std(window_nums), np.mean(times), np.std(times)) ) # reduce to num_intervals+1 x num_overlaps sum_array = np.sum(array, axis=0) # reduce to num_intervals x num_overlaps recalls = sum_array[:-1, :] / sum_array[-1, :] return recalls