def test_get_overlap(): bbgt = np.array([ 139., 200., 69., 102.]) bb = np.array([ 139., 200., 69., 102.]) ov = BoundingBox.get_overlap(bb,bbgt) print(ov) assert(ov == 1) bb = np.array([ 139., 200., 69., 51.]) ov = BoundingBox.get_overlap(bb,bbgt) print(ov) assert(ov == 0.5) bb = np.array([ 139., 200., 35., 51.]) ov = BoundingBox.get_overlap(bb,bbgt) print(ov) assert((ov >= 0.24) and (ov <= 0.26)) # switch order of arguments bb = np.array([ 139., 200., 35., 51.]) ov = BoundingBox.get_overlap(bbgt,bb) print(ov) assert((ov >= 0.24) and (ov <= 0.26)) bb = np.array([ 239., 300., 69., 51.]) ov = BoundingBox.get_overlap(bb,bbgt) print(ov) assert(ov == 0)
def load_dpm_dets_for_image(cls, image, dataset, suffix='dets_all_may25_DP'): """ Loads multi-class array of detections for an image from .mat format. """ t = time.time() name = os.path.splitext(image.name)[0] # TODO: figure out how to deal with different types of detections dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP' filename = opjoin(dets_dir, '%s_dets_all_may25_DP.mat' % name) if not opexists(filename): dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval' filename = opjoin(dets_dir, '%s_dets_all_jun1_DP_trainval.mat' % name) if not opexists(filename): filename = opjoin(config.test_support_dir, 'dets/%s_dets_all_may25_DP.mat' % name) if not opexists(filename): print("File does not exist!") return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] feat_time = times[0, 0] dets_seq = [] cols = [ 'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy', 'score', 'time' ] for cls_ind, cls in enumerate(config.pascal_classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0] > 0: det_time = times[cls_ind, 1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # subtract 1 pixel and convert from corners! cls_dets[:, :4] -= 1 cls_dets[:, :4] = BoundingBox.convert_arr_from_corners( cls_dets[:, :4]) dets_seq.append(cls_dets) cols = [ 'x', 'y', 'w', 'h', 'dummy', 'dummy', 'dummy', 'dummy', 'score', 'time', 'cls_ind' ] # NMS detections per class individually dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols}) dets_mc[:, :4] = BoundingBox.clipboxes_arr( dets_mc[:, :4], (0, 0, image.size[0] - 1, image.size[1] - 1)) time_elapsed = time.time() - t print("On image %s, took %.3f s" % (image.name, time_elapsed)) return dets_mc
def load_from_pascal_xml_filename(cls, classes, filename): "Load image info from a file in the PASCAL VOC XML format." def get_data_from_tag(node, tag): if tag is "bndbox": x1 = int( node.getElementsByTagName(tag) [0].childNodes[1].childNodes[0].data) y1 = int( node.getElementsByTagName(tag) [0].childNodes[3].childNodes[0].data) x2 = int( node.getElementsByTagName(tag) [0].childNodes[5].childNodes[0].data) y2 = int( node.getElementsByTagName(tag) [0].childNodes[7].childNodes[0].data) return (x1, y1, x2, y2) else: return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) # image info name = get_data_from_tag(data, "filename") filename = opjoin(config.VOC_dir, 'JPEGImages', name) size = data.getElementsByTagName("size")[0] im_width = int(get_data_from_tag(size, "width")) im_height = int(get_data_from_tag(size, "height")) im_depth = int(get_data_from_tag(size, "depth")) width = im_width height = im_height img = Image(width, height, classes, name) # per-object info objects = [] for obj in data.getElementsByTagName("object"): clas = str(get_data_from_tag(obj, "name")).lower().strip() diff = int(get_data_from_tag(obj, "difficult")) trun = int(get_data_from_tag(obj, "truncated")) rect = get_data_from_tag(obj, "bndbox") bbox = BoundingBox(rect, format='corners') cls_ind = classes.index(clas) objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun))) if len(objects) > 0: img.objects_table = Table(np.array(objects), cls.columns) else: img.objects_table = Table(None, cls.columns) return img
def load_csc_dpm_dets_for_image(cls, image, dataset): """ Loads HOS's cascaded dets. """ t = time.time() name = os.path.splitext(image.name)[0] # if uest dataset, use HOS's detections. if not, need to output my own if re.search('test', dataset.name): dirname = config.get_dets_test_wholeset_dir() filename = os.path.join( dirname, '%s_dets_all_test_original_cascade_wholeset.mat' % name) else: dirname = config.get_dets_nov19() filename = os.path.join(dirname, '%s_dets_all_nov19.mat' % name) print filename if not os.path.exists(filename): raise RuntimeError("File %s does not exist!" % filename) return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] feat_time = times[0, 0] dets_seq = [] cols = [ 'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy', 'dummy', 'dummy', 'score' ] for cls_ind, cls in enumerate(dataset.classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0] > 0: good_ind = [0, 1, 2, 3, 10] cls_dets = cls_dets[:, good_ind] det_time = times[cls_ind, 1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # convert from corners! cls_dets[:, :4] = BoundingBox.convert_arr_from_corners( cls_dets[:, :4]) cls_dets[:, :4] = BoundingBox.clipboxes_arr( cls_dets[:, :4], (0, 0, image.size[0], image.size[1])) dets_seq.append(cls_dets) cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind'] dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols}) time_elapsed = time.time() - t print("On image %s, took %.3f s" % (image.name, time_elapsed)) return dets_mc
def test_get_overlap_with_array(): bbgt = np.array([ 139., 200., 69., 102.]) bb1 = np.array([ 139., 200., 69., 102.]) bb2 = np.array([ 139., 200., 69., 51.]) bb3 = np.array([ 239., 300., 69., 51.]) bb = np.vstack((bb1,bb2,bb3)) numtimes = 100000 bb = np.tile(bb, (numtimes,1)) tt = ut.TicToc().tic() ov = BoundingBox.get_overlap(bb,bbgt) tt.toc() tt.tic() ov = BoundingBox.get_overlap(bb,bbgt) tt.toc() print(ov) assert(np.all(ov == np.tile(np.array([1,0.5,0]),numtimes)))
def test_convert_to_and_fro_with_array(): bb1 = np.array([ 139., 200., 69., 102.]) bb2 = np.array([ 139., 200., 69., 51.]) bb3 = np.array([ 239., 300., 69., 51.]) bb = np.vstack((bb1,bb2,bb3)) bb = np.tile(bb, (100000,1)) print(bb.shape) tt = ut.TicToc().tic() bb_c = BoundingBox.convert_arr_to_corners(bb) tt.toc() print(bb_c.shape) tt.tic() bb2 = BoundingBox.convert_arr_from_corners(bb_c) tt.toc() print(bb2.shape) assert(np.all(bb == bb2))
def load_from_json_data(cls, classes, data): "Return an Image instantiated from a JSON representation." name = data['name'] width = data['size'][0] height = data['size'][1] img = Image(width, height, classes, name) objects = [] for obj in data['objects']: bbox = BoundingBox(obj['bbox']) cls_name = obj['class'] cls_ind = classes.index(cls_name) diff = obj['diff'] trun = obj['trun'] objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun))) if len(objects) > 0: img.objects_table = Table(np.array(objects), cls.columns) else: img.objects_table = Table(None, cls.columns) return img
def load_from_json_data(cls, classes, data): "Return an Image instantiated from a JSON representation." name = data['name'] width = data['size'][0] height = data['size'][1] img = Image(width,height,classes,name) objects = [] for obj in data['objects']: bbox = BoundingBox(obj['bbox']) cls_name = obj['class'] cls_ind = classes.index(cls_name) diff = obj['diff'] trun = obj['trun'] objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun))) if len(objects)>0: img.objects_table = Table(np.array(objects), cls.columns) else: img.objects_table = Table(None, cls.columns) return img
def load_from_pascal_xml_filename(cls, classes, filename): "Load image info from a file in the PASCAL VOC XML format." def get_data_from_tag(node, tag): if tag is "bndbox": x1 = int(node.getElementsByTagName(tag)[0].childNodes[1].childNodes[0].data) y1 = int(node.getElementsByTagName(tag)[0].childNodes[3].childNodes[0].data) x2 = int(node.getElementsByTagName(tag)[0].childNodes[5].childNodes[0].data) y2 = int(node.getElementsByTagName(tag)[0].childNodes[7].childNodes[0].data) return (x1, y1, x2, y2) else: return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) # image info name = get_data_from_tag(data, "filename") filename = opjoin(config.VOC_dir, 'JPEGImages', name) size = data.getElementsByTagName("size")[0] im_width = int(get_data_from_tag(size, "width")) im_height = int(get_data_from_tag(size, "height")) im_depth = int(get_data_from_tag(size, "depth")) width = im_width height = im_height img = Image(width,height,classes,name) # per-object info objects = [] for obj in data.getElementsByTagName("object"): clas = str(get_data_from_tag(obj, "name")).lower().strip() diff = int(get_data_from_tag(obj, "difficult")) trun = int(get_data_from_tag(obj, "truncated")) rect = get_data_from_tag(obj, "bndbox") bbox = BoundingBox(rect, format='corners') cls_ind = classes.index(clas) objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun))) if len(objects)>0: img.objects_table = Table(np.array(objects), cls.columns) else: img.objects_table = Table(None, cls.columns) return img
def load_dpm_dets_for_image(cls, image, dataset, suffix='dets_all_may25_DP'): """ Loads multi-class array of detections for an image from .mat format. """ t = time.time() name = os.path.splitext(image.name)[0] # TODO: figure out how to deal with different types of detections dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP' filename = opjoin(dets_dir, '%s_dets_all_may25_DP.mat'%name) if not opexists(filename): dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval' filename = opjoin(dets_dir, '%s_dets_all_jun1_DP_trainval.mat'%name) if not opexists(filename): filename = opjoin(config.test_support_dir,'dets/%s_dets_all_may25_DP.mat'%name) if not opexists(filename): print("File does not exist!") return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] feat_time = times[0,0] dets_seq = [] cols = ['x1','y1','x2','y2','dummy','dummy','dummy','dummy','score','time'] for cls_ind,cls in enumerate(config.pascal_classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0]>0: det_time = times[cls_ind,1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # subtract 1 pixel and convert from corners! cls_dets[:,:4] -= 1 cls_dets[:,:4] = BoundingBox.convert_arr_from_corners(cls_dets[:,:4]) dets_seq.append(cls_dets) cols = ['x','y','w','h','dummy','dummy','dummy','dummy','score','time','cls_ind'] # NMS detections per class individually dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols':cols}) dets_mc[:,:4] = BoundingBox.clipboxes_arr(dets_mc[:,:4],(0,0,image.size[0]-1,image.size[1]-1)) time_elapsed = time.time()-t print("On image %s, took %.3f s"%(image.name, time_elapsed)) return dets_mc
def load_csc_dpm_dets_for_image(cls, image, dataset): """ Loads HOS's cascaded dets. """ t = time.time() name = os.path.splitext(image.name)[0] # if uest dataset, use HOS's detections. if not, need to output my own if re.search('test', dataset.name): dirname = config.get_dets_test_wholeset_dir() filename = os.path.join(dirname,'%s_dets_all_test_original_cascade_wholeset.mat'%name) else: dirname = config.get_dets_nov19() filename = os.path.join(dirname, '%s_dets_all_nov19.mat'%name) print filename if not os.path.exists(filename): raise RuntimeError("File %s does not exist!"%filename) return None mat = scipy.io.loadmat(filename) dets = mat['dets_mc'] times = mat['times_mc'] feat_time = times[0,0] dets_seq = [] cols = ['x1','y1','x2','y2','dummy','dummy','dummy','dummy','dummy','dummy','score'] for cls_ind,cls in enumerate(dataset.classes): cls_dets = dets[cls_ind][0] if cls_dets.shape[0]>0: good_ind = [0,1,2,3,10] cls_dets = cls_dets[:,good_ind] det_time = times[cls_ind,1] # all detections get the final time cls_dets = ut.append_index_column(cls_dets, det_time) cls_dets = ut.append_index_column(cls_dets, cls_ind) # convert from corners! cls_dets[:,:4] = BoundingBox.convert_arr_from_corners(cls_dets[:,:4]) cls_dets[:,:4] = BoundingBox.clipboxes_arr(cls_dets[:,:4], (0,0,image.size[0],image.size[1])) dets_seq.append(cls_dets) cols = ['x','y','w','h','score','time','cls_ind'] dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols':cols}) time_elapsed = time.time()-t print("On image %s, took %.3f s"%(image.name, time_elapsed)) return dets_mc
def get_windows(clas, image, cls=None, window_params=None, with_time=False): """ Return all windows that can be generated with window_params. If with_time=True, return tuple of (windows, time_elapsed). """ assert (cls or window_params) if not window_params: window_params = self.get_default_window_params(cls) t = time.time() stride = window_params.stride min_width = window_params.min_width actual_xs = [] actual_ys = [] actual_ws = [] actual_hs = [] num_windows = 0 # we want to be able to capture objects that extend past the image # we always iterate over locations in native space, and convert to # actual image space when we record the window w_pad = int(1. * min_width / 2) x_min = -w_pad for scale in window_params.scales: x_max = int(image.width * scale) - w_pad if w_pad > 0: x_max += stride actual_w = int(min_width / scale) + 1 for ratio in window_params.aspect_ratios: h_pad = int(1. * min_width * ratio / 2) y_min = -h_pad y_max = int(image.height * scale) - h_pad if h_pad > 0: y_max += stride actual_h = int(min_width / scale * ratio) + 1 for y in range(y_min, y_max, stride): for x in range(x_min, x_max, stride): actual_ws.append(actual_w) actual_hs.append(actual_h) actual_xs.append(int(x / scale)) actual_ys.append(int(y / scale)) windows = np.array([actual_xs, actual_ys, actual_ws, actual_hs]).T windows = BoundingBox.clipboxes_arr(windows, (0, 0, image.width, image.height)) if with_time: time_elapsed = time.time() - t return (windows, time_elapsed) else: return windows
def get_windows(clas,image,cls=None,window_params=None,with_time=False): """ Return all windows that can be generated with window_params. If with_time=True, return tuple of (windows, time_elapsed). """ assert(cls or window_params) if not window_params: window_params = self.get_default_window_params(cls) t = time.time() stride = window_params.stride min_width = window_params.min_width actual_xs = [] actual_ys = [] actual_ws = [] actual_hs = [] num_windows = 0 # we want to be able to capture objects that extend past the image # we always iterate over locations in native space, and convert to # actual image space when we record the window w_pad = int(1.*min_width/2) x_min = -w_pad for scale in window_params.scales: x_max = int(image.width*scale)-w_pad if w_pad > 0: x_max += stride actual_w = int(min_width/scale) + 1 for ratio in window_params.aspect_ratios: h_pad = int(1.*min_width*ratio/2) y_min = -h_pad y_max = int(image.height*scale)-h_pad if h_pad > 0: y_max += stride actual_h = int(min_width/scale * ratio) + 1 for y in range(y_min,y_max,stride): for x in range(x_min,x_max,stride): actual_ws.append(actual_w) actual_hs.append(actual_h) actual_xs.append(int(x/scale)) actual_ys.append(int(y/scale)) windows = np.array([actual_xs,actual_ys,actual_ws,actual_hs]).T windows = BoundingBox.clipboxes_arr(windows,(0,0,image.width,image.height)) if with_time: time_elapsed = time.time()-t return (windows,time_elapsed) else: return windows
def test_get_whole_image_bbox(self): image = Image(20, 10, [], 'test_image') assert (image.get_whole_image_bbox() == BoundingBox((0, 0, 20, 10))) image = Image(2, 100, [], 'test_image') assert (image.get_whole_image_bbox() == BoundingBox((0, 0, 2, 100)))
def main(): parser = argparse.ArgumentParser(description="Execute different functions of our system") parser.add_argument("--first_n", type=int, help="only take the first N images in the datasets") parser.add_argument( "--name", help="name for this run", default="default", choices=["default", "nolateral", "nohal", "halfsize"] ) parser.add_argument("--force", action="store_true", default=False, help="force overwrite") args = parser.parse_args() print (args) # configuration class class config(object): pass cfg = config() cfg.testname = "../ctfdet/data/finalRL/%s2_test" # object model cfg.bottomup = False # use complete search cfg.resize = 1.0 # resize the input image cfg.hallucinate = True # use HOGs up to 4 pixels cfg.initr = 1 # initial radious of the CtF search cfg.ratio = 1 # radious at the next levels cfg.deform = True # use deformation cfg.usemrf = True # use lateral constraints if args.name == "default": cfg # sticking with the default params elif args.name == "nolateral": cfg.usemrf = False elif args.name == "nohal": cfg.hallucinate = False elif args.name == "halfsize": cfg.resize = 0.5 # f**k it, do both test_datasets = ["val", "test", "train"] for test_dataset in test_datasets: # Load the dataset dataset = Dataset("full_pascal_" + test_dataset) if args.first_n: dataset.images = dataset.images[: args.first_n] # create directory for storing cached detections dirname = "./temp_data" if os.path.exists("/u/sergeyk"): dirname = "/u/vis/x1/sergeyk/object_detection" dirname = dirname + "/ctfdets/%s" % (args.name) ut.makedirs(dirname) num_images = len(dataset.images) for img_ind in range(comm_rank, num_images, comm_size): # check for existing det image = dataset.images[img_ind] filename = os.path.join(dirname, image.name + ".npy") if os.path.exists(filename) and not args.force: # table = np.load(filename)[()] continue # read the image imname = dataset.get_image_filename(img_ind) img = util2.myimread(imname, resize=cfg.resize) # compute the hog pyramid f = pyrHOG2.pyrHOG( img, interv=10, savedir="", notsave=True, notload=True, hallucinate=cfg.hallucinate, cformat=True ) # for each class all_dets = [] for ccls in dataset.classes: t = time.time() cls_ind = dataset.get_ind(ccls) print "%s Img %d/%d Class: %s" % (test_dataset, img_ind + 1, num_images, ccls) # load the class model m = util2.load("%s%d.model" % (cfg.testname % ccls, 7)) res = [] t1 = time.time() # for each aspect for clm, m in enumerate(m): # scan the image with left and right models res.append( pyrHOG2RL.detectflip( f, m, None, hallucinate=cfg.hallucinate, initr=cfg.initr, ratio=cfg.ratio, deform=cfg.deform, bottomup=cfg.bottomup, usemrf=cfg.usemrf, small=False, cl=clm, ) ) fuse = [] numhog = 0 # fuse the detections for mix in res: tr = mix[0] fuse += mix[1] numhog += mix[3] rfuse = tr.rank(fuse, maxnum=300) nfuse = tr.cluster(rfuse, ovr=0.3, inclusion=False) # print "Number of computed HOGs:",numhog time_elapsed = time.time() - t print "Elapsed time: %.3f s" % time_elapsed bboxes = [nf["bbox"] for nf in nfuse] scores = [nf["scr"] for nf in nfuse] assert len(bboxes) == len(scores) if len(bboxes) > 0: arr = np.zeros((len(bboxes), 7)) arr[:, :4] = BoundingBox.convert_arr_from_corners(np.array(bboxes)) arr[:, 4] = scores arr[:, 5] = time_elapsed arr[:, 6] = cls_ind all_dets.append(arr) cols = ["x", "y", "w", "h", "score", "time", "cls_ind"] if len(all_dets) > 0: all_dets = np.concatenate(all_dets, 0) else: all_dets = np.array([]) table = Table(all_dets, cols) np.save(filename, table)
def get_whole_image_bbox(self): "Return a BoundingBox with (0,0,width,height) of the image." return BoundingBox((0, 0, self.width, self.height))
def main(): parser = argparse.ArgumentParser( description='Execute different functions of our system') parser.add_argument('--first_n', type=int, help='only take the first N images in the datasets') parser.add_argument('--name', help='name for this run', default='default', choices=['default', 'nolateral', 'nohal', 'halfsize']) parser.add_argument('--force', action='store_true', default=False, help='force overwrite') args = parser.parse_args() print(args) #configuration class class config(object): pass cfg = config() cfg.testname = "../ctfdet/data/finalRL/%s2_test" #object model cfg.bottomup = False #use complete search cfg.resize = 1.0 #resize the input image cfg.hallucinate = True #use HOGs up to 4 pixels cfg.initr = 1 #initial radious of the CtF search cfg.ratio = 1 #radious at the next levels cfg.deform = True #use deformation cfg.usemrf = True #use lateral constraints if args.name == 'default': cfg # sticking with the default params elif args.name == 'nolateral': cfg.usemrf = False elif args.name == 'nohal': cfg.hallucinate = False elif args.name == 'halfsize': cfg.resize = 0.5 # f**k it, do both test_datasets = ['val', 'test', 'train'] for test_dataset in test_datasets: # Load the dataset dataset = Dataset('full_pascal_' + test_dataset) if args.first_n: dataset.images = dataset.images[:args.first_n] # create directory for storing cached detections dirname = './temp_data' if os.path.exists('/u/sergeyk'): dirname = '/u/vis/x1/sergeyk/object_detection' dirname = dirname + '/ctfdets/%s' % (args.name) ut.makedirs(dirname) num_images = len(dataset.images) for img_ind in range(comm_rank, num_images, comm_size): # check for existing det image = dataset.images[img_ind] filename = os.path.join(dirname, image.name + '.npy') if os.path.exists(filename) and not args.force: #table = np.load(filename)[()] continue #read the image imname = dataset.get_image_filename(img_ind) img = util2.myimread(imname, resize=cfg.resize) #compute the hog pyramid f = pyrHOG2.pyrHOG(img, interv=10, savedir="", notsave=True, notload=True, hallucinate=cfg.hallucinate, cformat=True) #for each class all_dets = [] for ccls in dataset.classes: t = time.time() cls_ind = dataset.get_ind(ccls) print "%s Img %d/%d Class: %s" % (test_dataset, img_ind + 1, num_images, ccls) #load the class model m = util2.load("%s%d.model" % (cfg.testname % ccls, 7)) res = [] t1 = time.time() #for each aspect for clm, m in enumerate(m): #scan the image with left and right models res.append( pyrHOG2RL.detectflip(f, m, None, hallucinate=cfg.hallucinate, initr=cfg.initr, ratio=cfg.ratio, deform=cfg.deform, bottomup=cfg.bottomup, usemrf=cfg.usemrf, small=False, cl=clm)) fuse = [] numhog = 0 #fuse the detections for mix in res: tr = mix[0] fuse += mix[1] numhog += mix[3] rfuse = tr.rank(fuse, maxnum=300) nfuse = tr.cluster(rfuse, ovr=0.3, inclusion=False) #print "Number of computed HOGs:",numhog time_elapsed = time.time() - t print "Elapsed time: %.3f s" % time_elapsed bboxes = [nf['bbox'] for nf in nfuse] scores = [nf['scr'] for nf in nfuse] assert (len(bboxes) == len(scores)) if len(bboxes) > 0: arr = np.zeros((len(bboxes), 7)) arr[:, :4] = BoundingBox.convert_arr_from_corners( np.array(bboxes)) arr[:, 4] = scores arr[:, 5] = time_elapsed arr[:, 6] = cls_ind all_dets.append(arr) cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind'] if len(all_dets) > 0: all_dets = np.concatenate(all_dets, 0) else: all_dets = np.array([]) table = Table(all_dets, cols) np.save(filename, table)
def get_windows_new(self, image, cls, metaparams=None, with_time=False, at_most=200000, force=False): """ Generate windows by using ground truth window stats and metaparams. metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode' metaparams['mode'] can be 'linear' or 'importance' and refers to the method of sampling intervals per window parameter. If with_time=True, return tuple of (windows, time_elapsed). """ if not metaparams: metaparams = { 'samples_per_500px': 83, 'num_scales': 12, 'num_ratios': 6, 'mode': 'importance', 'priority': 0} t = time.time() x_samples = int(image.width/500. * metaparams['samples_per_500px']) y_samples = int(image.height/500. * metaparams['samples_per_500px']) # check for cached windows and return if found dirname = config.get_sliding_windows_cached_dir(self.train_name) filename = '%s_%d_%d_%s_%s_%d_%d_%d.npy'%( cls, metaparams['samples_per_500px'], metaparams['num_scales'], metaparams['num_ratios'], metaparams['mode'], metaparams['priority'], x_samples, y_samples) filename = os.path.join(dirname,filename) if os.path.exists(filename) and not force: windows = np.load(filename) else: # fine, we'll figure out the windows again # load the kde for x_scaled,y_scaled,scale,log_ratio stats = self.get_stats() kde = stats['%s_kde'%cls] x_frac = kde.dataset[0,:] y_frac = kde.dataset[1,:] scale = kde.dataset[2,:] log_ratio = kde.dataset[3,:] # given the metaparameters, sample points to generate the complete list of # parameter combinations if metaparams['mode'] == 'linear': x_points = np.linspace(x_frac.min(),x_frac.max(),x_samples) y_points = np.linspace(y_frac.min(),y_frac.max(),y_samples) scale_points = np.linspace(scale.min(),scale.max(),metaparams['num_scales']) ratio_points = np.linspace(log_ratio.min(),log_ratio.max(),metaparams['num_ratios']) elif metaparams['mode'] == 'importance': x_points = ut.importance_sample(x_frac,x_samples,stats['%s_%s_kde'%(cls,'x_frac')]) y_points = ut.importance_sample(y_frac,y_samples,stats['%s_%s_kde'%(cls,'y_frac')]) scale_points = ut.importance_sample(scale,metaparams['num_scales'],stats['%s_%s_kde'%(cls,'scale')]) ratio_points = ut.importance_sample(log_ratio,metaparams['num_ratios'],stats['%s_%s_kde'%(cls,'log_ratio')]) else: raise RuntimeError("Invalid mode") combinations = [x for x in itertools.product(x_points,y_points,scale_points,ratio_points)] combinations = np.array(combinations).T # only take the top-scoring detections if metaparams['priority']: t22=time.time() scores = kde(combinations) # (so slow!) print("kde took %.3f s"%(time.time()-t22)) sorted_inds = np.argsort(-scores) max_num = min(at_most,sorted_inds.size) combinations = combinations[:,sorted_inds[:max_num]] # convert to x,y,scale,ratio,w,h scale = combinations[2,:] # x = x_frac*img_width x = combinations[0,:]*img_width # ratio = exp(log_ratio) ratio = np.exp(combinations[3,:]) # y = y_frac*img_height y = combinations[1,:]*img_height # w = scale*min_width w = scale*SlidingWindows.MIN_WIDTH # h = w*ratio h = w * ratio combinations[0,:] = x combinations[1,:] = y combinations[2,:] = w combinations[3,:] = h windows = combinations.T windows = BoundingBox.clipboxes_arr(windows,(0,0,img_width,img_height)) np.save(filename,windows) # does not take more than 0.5 sec even for 10**6 windows time_elapsed = time.time()-t print("get_windows_new() got %d windows in %.3fs"%(windows.shape[0],time_elapsed)) if with_time: return (windows,time_elapsed) else: return windows
def get_recalls(self, cls, metaparams, mode, window_intervals, min_overlaps): """ Return nparray of num_intervals x num_overlaps, with each entry specifying the recall for that combination of window_interval and min_overlap. window_intervals must begin with 0. mode must be in ['sw','jw'] """ assert (window_intervals[0] == 0) num_overlaps = len(min_overlaps) num_intervals = len(window_intervals) times = [0] window_nums = [0] image_inds = self.dataset.get_pos_samples_for_class(cls) num_images = len(image_inds) # we are building up a num_images x num_intervals+1 x num_overlaps array array = np.zeros((num_images, num_intervals + 1, num_overlaps)) for i in range(num_images): ind = image_inds[i] image = self.dataset.images[ind] # the first interval is 0, so there aren't any window proposals array[i, 0, :] = 0 gts = image.get_ground_truth(cls) num_gt = gts.shape[0] # the last row of the matrix is the number of ground truth array[i, num_intervals, :] = num_gt # now get the windows and append the statistics information #windows,time_elapsed = window_generator.get_windows(image,cls,with_time=True) if mode == 'sw': windows, time_elapsed = self.get_windows_new( image, cls, metaparams, with_time=True, at_most=max(window_intervals)) elif mode == 'jw': windows, time_elapsed = self.jw.get_windows(image, cls, K=10000) else: raise RuntimeError('impossible mode') # shuffle the windows if we want to take them in random order if mode == 'sw' and not metaparams['priority']: rand_ind = np.random.permutation(windows.shape[0]) windows = windows[rand_ind, :] window_nums.append(windows.shape[0]) times.append(time_elapsed) # go through each interval and count how many ground truth are matched for j in range(1, len(window_intervals)): max_ind = window_intervals[j] # if we are going to ask for more windows that are available, # the recall is going to be the same as before, so just add that if max_ind > windows.shape[0]: array[i, j, :] = array[i, j - 1, :] continue # otherwise, count the number of ground truths that are overlapped # NOTE: a single window can overlap multiple ground truth in this # scheme for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:max_ind, :4], gt[:4]) for k, min_overlap in enumerate(min_overlaps): if np.any(overlaps >= min_overlap): array[i, j, k] += 1 print( "Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec" % (np.mean(window_nums), np.std(window_nums), np.mean(times), np.std(times))) # reduce to num_intervals+1 x num_overlaps sum_array = np.sum(array, axis=0) # reduce to num_intervals x num_overlaps recalls = sum_array[:-1, :] / sum_array[-1, :] return recalls
def get_windows_new(self, image, cls, metaparams=None, with_time=False, at_most=200000, force=False): """ Generate windows by using ground truth window stats and metaparams. metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode' metaparams['mode'] can be 'linear' or 'importance' and refers to the method of sampling intervals per window parameter. If with_time=True, return tuple of (windows, time_elapsed). """ if not metaparams: metaparams = { 'samples_per_500px': 83, 'num_scales': 12, 'num_ratios': 6, 'mode': 'importance', 'priority': 0 } t = time.time() x_samples = int(image.width / 500. * metaparams['samples_per_500px']) y_samples = int(image.height / 500. * metaparams['samples_per_500px']) # check for cached windows and return if found dirname = config.get_sliding_windows_cached_dir(self.train_name) filename = '%s_%d_%d_%s_%s_%d_%d_%d.npy' % ( cls, metaparams['samples_per_500px'], metaparams['num_scales'], metaparams['num_ratios'], metaparams['mode'], metaparams['priority'], x_samples, y_samples) filename = os.path.join(dirname, filename) if os.path.exists(filename) and not force: windows = np.load(filename) else: # fine, we'll figure out the windows again # load the kde for x_scaled,y_scaled,scale,log_ratio stats = self.get_stats() kde = stats['%s_kde' % cls] x_frac = kde.dataset[0, :] y_frac = kde.dataset[1, :] scale = kde.dataset[2, :] log_ratio = kde.dataset[3, :] # given the metaparameters, sample points to generate the complete list of # parameter combinations if metaparams['mode'] == 'linear': x_points = np.linspace(x_frac.min(), x_frac.max(), x_samples) y_points = np.linspace(y_frac.min(), y_frac.max(), y_samples) scale_points = np.linspace(scale.min(), scale.max(), metaparams['num_scales']) ratio_points = np.linspace(log_ratio.min(), log_ratio.max(), metaparams['num_ratios']) elif metaparams['mode'] == 'importance': x_points = ut.importance_sample( x_frac, x_samples, stats['%s_%s_kde' % (cls, 'x_frac')]) y_points = ut.importance_sample( y_frac, y_samples, stats['%s_%s_kde' % (cls, 'y_frac')]) scale_points = ut.importance_sample( scale, metaparams['num_scales'], stats['%s_%s_kde' % (cls, 'scale')]) ratio_points = ut.importance_sample( log_ratio, metaparams['num_ratios'], stats['%s_%s_kde' % (cls, 'log_ratio')]) else: raise RuntimeError("Invalid mode") combinations = [ x for x in itertools.product(x_points, y_points, scale_points, ratio_points) ] combinations = np.array(combinations).T # only take the top-scoring detections if metaparams['priority']: t22 = time.time() scores = kde(combinations) # (so slow!) print("kde took %.3f s" % (time.time() - t22)) sorted_inds = np.argsort(-scores) max_num = min(at_most, sorted_inds.size) combinations = combinations[:, sorted_inds[:max_num]] # convert to x,y,scale,ratio,w,h scale = combinations[2, :] # x = x_frac*img_width x = combinations[0, :] * img_width # ratio = exp(log_ratio) ratio = np.exp(combinations[3, :]) # y = y_frac*img_height y = combinations[1, :] * img_height # w = scale*min_width w = scale * SlidingWindows.MIN_WIDTH # h = w*ratio h = w * ratio combinations[0, :] = x combinations[1, :] = y combinations[2, :] = w combinations[3, :] = h windows = combinations.T windows = BoundingBox.clipboxes_arr(windows, (0, 0, img_width, img_height)) np.save(filename, windows ) # does not take more than 0.5 sec even for 10**6 windows time_elapsed = time.time() - t print("get_windows_new() got %d windows in %.3fs" % (windows.shape[0], time_elapsed)) if with_time: return (windows, time_elapsed) else: return windows
def test_convert_to_and_fro(): bb = np.array([ 139., 200., 69., 102.]) bb_c = BoundingBox.convert_arr_to_corners(bb) bb2 = BoundingBox.convert_arr_from_corners(bb_c) assert(np.all(bb == bb2))
def get_recalls(self,cls,metaparams,mode,window_intervals,min_overlaps): """ Return nparray of num_intervals x num_overlaps, with each entry specifying the recall for that combination of window_interval and min_overlap. window_intervals must begin with 0. mode must be in ['sw','jw'] """ assert(window_intervals[0] == 0) num_overlaps = len(min_overlaps) num_intervals = len(window_intervals) times = [0] window_nums = [0] image_inds = self.dataset.get_pos_samples_for_class(cls) num_images = len(image_inds) # we are building up a num_images x num_intervals+1 x num_overlaps array array = np.zeros((num_images,num_intervals+1,num_overlaps)) for i in range(num_images): ind = image_inds[i] image = self.dataset.images[ind] # the first interval is 0, so there aren't any window proposals array[i,0,:] = 0 gts = image.get_ground_truth(cls) num_gt = gts.shape[0] # the last row of the matrix is the number of ground truth array[i,num_intervals,:] = num_gt # now get the windows and append the statistics information #windows,time_elapsed = window_generator.get_windows(image,cls,with_time=True) if mode=='sw': windows,time_elapsed = self.get_windows_new(image,cls,metaparams,with_time=True,at_most=max(window_intervals)) elif mode=='jw': windows,time_elapsed = self.jw.get_windows(image,cls,K=10000) else: raise RuntimeError('impossible mode') # shuffle the windows if we want to take them in random order if mode=='sw' and not metaparams['priority']: rand_ind = np.random.permutation(windows.shape[0]) windows = windows[rand_ind,:] window_nums.append(windows.shape[0]) times.append(time_elapsed) # go through each interval and count how many ground truth are matched for j in range(1,len(window_intervals)): max_ind = window_intervals[j] # if we are going to ask for more windows that are available, # the recall is going to be the same as before, so just add that if max_ind>windows.shape[0]: array[i,j,:] = array[i,j-1,:] continue # otherwise, count the number of ground truths that are overlapped # NOTE: a single window can overlap multiple ground truth in this # scheme for gt in gts.arr: overlaps = BoundingBox.get_overlap(windows[:max_ind,:4],gt[:4]) for k,min_overlap in enumerate(min_overlaps): if np.any(overlaps>=min_overlap): array[i,j,k] += 1 print("Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec"%( np.mean(window_nums),np.std(window_nums), np.mean(times),np.std(times))) # reduce to num_intervals+1 x num_overlaps sum_array = np.sum(array,axis=0) # reduce to num_intervals x num_overlaps recalls = sum_array[:-1,:]/sum_array[-1,:] return recalls