Beispiel #1
0
def test_get_overlap():
  bbgt =  np.array([ 139.,  200.,   69.,  102.])
  bb =    np.array([ 139.,  200.,   69.,  102.])
  ov = BoundingBox.get_overlap(bb,bbgt)
  print(ov)
  assert(ov == 1)

  bb =    np.array([ 139.,  200.,   69.,  51.])
  ov = BoundingBox.get_overlap(bb,bbgt)
  print(ov)
  assert(ov == 0.5)

  bb =    np.array([ 139.,  200.,  35.,  51.])
  ov = BoundingBox.get_overlap(bb,bbgt)
  print(ov)
  assert((ov >= 0.24) and (ov <= 0.26))

  # switch order of arguments
  bb =    np.array([ 139.,  200.,  35.,  51.])
  ov = BoundingBox.get_overlap(bbgt,bb)
  print(ov)
  assert((ov >= 0.24) and (ov <= 0.26))

  bb =    np.array([ 239.,  300.,   69.,  51.])
  ov = BoundingBox.get_overlap(bb,bbgt)
  print(ov)
  assert(ov == 0)
Beispiel #2
0
 def load_dpm_dets_for_image(cls,
                             image,
                             dataset,
                             suffix='dets_all_may25_DP'):
     """
 Loads multi-class array of detections for an image from .mat format.
 """
     t = time.time()
     name = os.path.splitext(image.name)[0]
     # TODO: figure out how to deal with different types of detections
     dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP'
     filename = opjoin(dets_dir, '%s_dets_all_may25_DP.mat' % name)
     if not opexists(filename):
         dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval'
         filename = opjoin(dets_dir,
                           '%s_dets_all_jun1_DP_trainval.mat' % name)
         if not opexists(filename):
             filename = opjoin(config.test_support_dir,
                               'dets/%s_dets_all_may25_DP.mat' % name)
             if not opexists(filename):
                 print("File does not exist!")
                 return None
     mat = scipy.io.loadmat(filename)
     dets = mat['dets_mc']
     times = mat['times_mc']
     feat_time = times[0, 0]
     dets_seq = []
     cols = [
         'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy',
         'score', 'time'
     ]
     for cls_ind, cls in enumerate(config.pascal_classes):
         cls_dets = dets[cls_ind][0]
         if cls_dets.shape[0] > 0:
             det_time = times[cls_ind, 1]
             # all detections get the final time
             cls_dets = ut.append_index_column(cls_dets, det_time)
             cls_dets = ut.append_index_column(cls_dets, cls_ind)
             # subtract 1 pixel and convert from corners!
             cls_dets[:, :4] -= 1
             cls_dets[:, :4] = BoundingBox.convert_arr_from_corners(
                 cls_dets[:, :4])
             dets_seq.append(cls_dets)
     cols = [
         'x', 'y', 'w', 'h', 'dummy', 'dummy', 'dummy', 'dummy', 'score',
         'time', 'cls_ind'
     ]
     # NMS detections per class individually
     dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols})
     dets_mc[:, :4] = BoundingBox.clipboxes_arr(
         dets_mc[:, :4], (0, 0, image.size[0] - 1, image.size[1] - 1))
     time_elapsed = time.time() - t
     print("On image %s, took %.3f s" % (image.name, time_elapsed))
     return dets_mc
Beispiel #3
0
    def load_from_pascal_xml_filename(cls, classes, filename):
        "Load image info from a file in the PASCAL VOC XML format."

        def get_data_from_tag(node, tag):
            if tag is "bndbox":
                x1 = int(
                    node.getElementsByTagName(tag)
                    [0].childNodes[1].childNodes[0].data)
                y1 = int(
                    node.getElementsByTagName(tag)
                    [0].childNodes[3].childNodes[0].data)
                x2 = int(
                    node.getElementsByTagName(tag)
                    [0].childNodes[5].childNodes[0].data)
                y2 = int(
                    node.getElementsByTagName(tag)
                    [0].childNodes[7].childNodes[0].data)
                return (x1, y1, x2, y2)
            else:
                return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        # image info
        name = get_data_from_tag(data, "filename")
        filename = opjoin(config.VOC_dir, 'JPEGImages', name)
        size = data.getElementsByTagName("size")[0]
        im_width = int(get_data_from_tag(size, "width"))
        im_height = int(get_data_from_tag(size, "height"))
        im_depth = int(get_data_from_tag(size, "depth"))
        width = im_width
        height = im_height
        img = Image(width, height, classes, name)

        # per-object info
        objects = []
        for obj in data.getElementsByTagName("object"):
            clas = str(get_data_from_tag(obj, "name")).lower().strip()
            diff = int(get_data_from_tag(obj, "difficult"))
            trun = int(get_data_from_tag(obj, "truncated"))
            rect = get_data_from_tag(obj, "bndbox")
            bbox = BoundingBox(rect, format='corners')
            cls_ind = classes.index(clas)
            objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun)))
        if len(objects) > 0:
            img.objects_table = Table(np.array(objects), cls.columns)
        else:
            img.objects_table = Table(None, cls.columns)
        return img
Beispiel #4
0
 def load_csc_dpm_dets_for_image(cls, image, dataset):
     """
 Loads HOS's cascaded dets.
 """
     t = time.time()
     name = os.path.splitext(image.name)[0]
     # if uest dataset, use HOS's detections. if not, need to output my own
     if re.search('test', dataset.name):
         dirname = config.get_dets_test_wholeset_dir()
         filename = os.path.join(
             dirname,
             '%s_dets_all_test_original_cascade_wholeset.mat' % name)
     else:
         dirname = config.get_dets_nov19()
         filename = os.path.join(dirname, '%s_dets_all_nov19.mat' % name)
     print filename
     if not os.path.exists(filename):
         raise RuntimeError("File %s does not exist!" % filename)
         return None
     mat = scipy.io.loadmat(filename)
     dets = mat['dets_mc']
     times = mat['times_mc']
     feat_time = times[0, 0]
     dets_seq = []
     cols = [
         'x1', 'y1', 'x2', 'y2', 'dummy', 'dummy', 'dummy', 'dummy',
         'dummy', 'dummy', 'score'
     ]
     for cls_ind, cls in enumerate(dataset.classes):
         cls_dets = dets[cls_ind][0]
         if cls_dets.shape[0] > 0:
             good_ind = [0, 1, 2, 3, 10]
             cls_dets = cls_dets[:, good_ind]
             det_time = times[cls_ind, 1]
             # all detections get the final time
             cls_dets = ut.append_index_column(cls_dets, det_time)
             cls_dets = ut.append_index_column(cls_dets, cls_ind)
             # convert from corners!
             cls_dets[:, :4] = BoundingBox.convert_arr_from_corners(
                 cls_dets[:, :4])
             cls_dets[:, :4] = BoundingBox.clipboxes_arr(
                 cls_dets[:, :4], (0, 0, image.size[0], image.size[1]))
             dets_seq.append(cls_dets)
     cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind']
     dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols': cols})
     time_elapsed = time.time() - t
     print("On image %s, took %.3f s" % (image.name, time_elapsed))
     return dets_mc
Beispiel #5
0
def test_get_overlap_with_array():
  bbgt =  np.array([ 139.,  200.,   69.,  102.])
  bb1 =    np.array([ 139.,  200.,   69.,  102.])
  bb2 =    np.array([ 139.,  200.,   69.,  51.])
  bb3 =    np.array([ 239.,  300.,   69.,  51.])
  bb = np.vstack((bb1,bb2,bb3))
  numtimes = 100000
  bb = np.tile(bb, (numtimes,1))
  tt = ut.TicToc().tic()
  ov = BoundingBox.get_overlap(bb,bbgt)
  tt.toc()
  tt.tic()
  ov = BoundingBox.get_overlap(bb,bbgt)
  tt.toc()
  print(ov)
  assert(np.all(ov == np.tile(np.array([1,0.5,0]),numtimes)))
Beispiel #6
0
def test_convert_to_and_fro_with_array():
  bb1 =    np.array([ 139.,  200.,   69.,  102.])
  bb2 =    np.array([ 139.,  200.,   69.,  51.])
  bb3 =    np.array([ 239.,  300.,   69.,  51.])
  bb = np.vstack((bb1,bb2,bb3))
  bb = np.tile(bb, (100000,1))
  print(bb.shape)
  tt = ut.TicToc().tic()
  bb_c = BoundingBox.convert_arr_to_corners(bb)
  tt.toc()
  print(bb_c.shape)
  tt.tic()
  bb2 = BoundingBox.convert_arr_from_corners(bb_c)
  tt.toc()
  print(bb2.shape)
  assert(np.all(bb == bb2))
Beispiel #7
0
 def load_from_json_data(cls, classes, data):
     "Return an Image instantiated from a JSON representation."
     name = data['name']
     width = data['size'][0]
     height = data['size'][1]
     img = Image(width, height, classes, name)
     objects = []
     for obj in data['objects']:
         bbox = BoundingBox(obj['bbox'])
         cls_name = obj['class']
         cls_ind = classes.index(cls_name)
         diff = obj['diff']
         trun = obj['trun']
         objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun)))
     if len(objects) > 0:
         img.objects_table = Table(np.array(objects), cls.columns)
     else:
         img.objects_table = Table(None, cls.columns)
     return img
Beispiel #8
0
 def load_from_json_data(cls, classes, data):
   "Return an Image instantiated from a JSON representation."
   name = data['name']
   width = data['size'][0]
   height = data['size'][1]
   img = Image(width,height,classes,name)
   objects = []
   for obj in data['objects']:
     bbox = BoundingBox(obj['bbox'])
     cls_name = obj['class']
     cls_ind = classes.index(cls_name)
     diff = obj['diff']
     trun = obj['trun']
     objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun)))
   if len(objects)>0:
     img.objects_table = Table(np.array(objects), cls.columns)
   else:
     img.objects_table = Table(None, cls.columns)
   return img
Beispiel #9
0
  def load_from_pascal_xml_filename(cls, classes, filename):
    "Load image info from a file in the PASCAL VOC XML format."

    def get_data_from_tag(node, tag):
      if tag is "bndbox":
        x1 = int(node.getElementsByTagName(tag)[0].childNodes[1].childNodes[0].data)
        y1 = int(node.getElementsByTagName(tag)[0].childNodes[3].childNodes[0].data)
        x2 = int(node.getElementsByTagName(tag)[0].childNodes[5].childNodes[0].data)
        y2 = int(node.getElementsByTagName(tag)[0].childNodes[7].childNodes[0].data)
        return (x1, y1, x2, y2)
      else:
        return node.getElementsByTagName(tag)[0].childNodes[0].data

    with open(filename) as f:
      data = minidom.parseString(f.read())

    # image info
    name = get_data_from_tag(data, "filename")
    filename = opjoin(config.VOC_dir, 'JPEGImages', name)
    size = data.getElementsByTagName("size")[0]
    im_width = int(get_data_from_tag(size, "width"))
    im_height = int(get_data_from_tag(size, "height"))
    im_depth = int(get_data_from_tag(size, "depth"))
    width = im_width
    height = im_height
    img = Image(width,height,classes,name)

    # per-object info
    objects = []
    for obj in data.getElementsByTagName("object"):
      clas = str(get_data_from_tag(obj, "name")).lower().strip()
      diff = int(get_data_from_tag(obj, "difficult"))
      trun = int(get_data_from_tag(obj, "truncated"))
      rect = get_data_from_tag(obj, "bndbox")
      bbox = BoundingBox(rect, format='corners')
      cls_ind = classes.index(clas)
      objects.append(np.hstack((bbox.get_arr(), cls_ind, diff, trun)))
    if len(objects)>0:
      img.objects_table = Table(np.array(objects), cls.columns)
    else:
      img.objects_table = Table(None, cls.columns)
    return img
Beispiel #10
0
 def load_dpm_dets_for_image(cls, image, dataset, suffix='dets_all_may25_DP'):
   """
   Loads multi-class array of detections for an image from .mat format.
   """
   t = time.time()
   name = os.path.splitext(image.name)[0]
   # TODO: figure out how to deal with different types of detections
   dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_may25_DP'
   filename = opjoin(dets_dir, '%s_dets_all_may25_DP.mat'%name)
   if not opexists(filename):
     dets_dir = '/u/vis/x1/sergeyk/rl_detection/voc-release4/2007/tmp/dets_jun1_DP_trainval'
     filename = opjoin(dets_dir, '%s_dets_all_jun1_DP_trainval.mat'%name)
     if not opexists(filename):
       filename = opjoin(config.test_support_dir,'dets/%s_dets_all_may25_DP.mat'%name)
       if not opexists(filename):
         print("File does not exist!")
         return None
   mat = scipy.io.loadmat(filename)
   dets = mat['dets_mc']
   times = mat['times_mc']
   feat_time = times[0,0]
   dets_seq = []
   cols = ['x1','y1','x2','y2','dummy','dummy','dummy','dummy','score','time'] 
   for cls_ind,cls in enumerate(config.pascal_classes):
     cls_dets = dets[cls_ind][0]
     if cls_dets.shape[0]>0:
       det_time = times[cls_ind,1]
       # all detections get the final time
       cls_dets = ut.append_index_column(cls_dets, det_time)
       cls_dets = ut.append_index_column(cls_dets, cls_ind)
       # subtract 1 pixel and convert from corners!
       cls_dets[:,:4] -= 1
       cls_dets[:,:4] = BoundingBox.convert_arr_from_corners(cls_dets[:,:4])
       dets_seq.append(cls_dets)
   cols = ['x','y','w','h','dummy','dummy','dummy','dummy','score','time','cls_ind'] 
   # NMS detections per class individually
   dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols':cols})
   dets_mc[:,:4] = BoundingBox.clipboxes_arr(dets_mc[:,:4],(0,0,image.size[0]-1,image.size[1]-1))
   time_elapsed = time.time()-t
   print("On image %s, took %.3f s"%(image.name, time_elapsed))
   return dets_mc
Beispiel #11
0
 def load_csc_dpm_dets_for_image(cls, image, dataset):
   """
   Loads HOS's cascaded dets.
   """
   t = time.time()
   name = os.path.splitext(image.name)[0]
   # if uest dataset, use HOS's detections. if not, need to output my own
   if re.search('test', dataset.name):
     dirname = config.get_dets_test_wholeset_dir()
     filename = os.path.join(dirname,'%s_dets_all_test_original_cascade_wholeset.mat'%name)
   else:
     dirname = config.get_dets_nov19()
     filename = os.path.join(dirname, '%s_dets_all_nov19.mat'%name)
   print filename
   if not os.path.exists(filename):
     raise RuntimeError("File %s does not exist!"%filename)
     return None
   mat = scipy.io.loadmat(filename)
   dets = mat['dets_mc']
   times = mat['times_mc']
   feat_time = times[0,0]
   dets_seq = []
   cols = ['x1','y1','x2','y2','dummy','dummy','dummy','dummy','dummy','dummy','score'] 
   for cls_ind,cls in enumerate(dataset.classes):
     cls_dets = dets[cls_ind][0]
     if cls_dets.shape[0]>0:
       good_ind = [0,1,2,3,10]
       cls_dets = cls_dets[:,good_ind]
       det_time = times[cls_ind,1]
       # all detections get the final time
       cls_dets = ut.append_index_column(cls_dets, det_time)
       cls_dets = ut.append_index_column(cls_dets, cls_ind)
       # convert from corners!
       cls_dets[:,:4] = BoundingBox.convert_arr_from_corners(cls_dets[:,:4])
       cls_dets[:,:4] = BoundingBox.clipboxes_arr(cls_dets[:,:4], (0,0,image.size[0],image.size[1]))
       dets_seq.append(cls_dets)
   cols = ['x','y','w','h','score','time','cls_ind'] 
   dets_mc = ut.collect(dets_seq, Detector.nms_detections, {'cols':cols})
   time_elapsed = time.time()-t
   print("On image %s, took %.3f s"%(image.name, time_elapsed))
   return dets_mc
Beispiel #12
0
 def get_windows(clas,
                 image,
                 cls=None,
                 window_params=None,
                 with_time=False):
     """
 Return all windows that can be generated with window_params.
 If with_time=True, return tuple of (windows, time_elapsed).
 """
     assert (cls or window_params)
     if not window_params:
         window_params = self.get_default_window_params(cls)
     t = time.time()
     stride = window_params.stride
     min_width = window_params.min_width
     actual_xs = []
     actual_ys = []
     actual_ws = []
     actual_hs = []
     num_windows = 0
     # we want to be able to capture objects that extend past the image
     # we always iterate over locations in native space, and convert to
     # actual image space when we record the window
     w_pad = int(1. * min_width / 2)
     x_min = -w_pad
     for scale in window_params.scales:
         x_max = int(image.width * scale) - w_pad
         if w_pad > 0:
             x_max += stride
         actual_w = int(min_width / scale) + 1
         for ratio in window_params.aspect_ratios:
             h_pad = int(1. * min_width * ratio / 2)
             y_min = -h_pad
             y_max = int(image.height * scale) - h_pad
             if h_pad > 0:
                 y_max += stride
             actual_h = int(min_width / scale * ratio) + 1
             for y in range(y_min, y_max, stride):
                 for x in range(x_min, x_max, stride):
                     actual_ws.append(actual_w)
                     actual_hs.append(actual_h)
                     actual_xs.append(int(x / scale))
                     actual_ys.append(int(y / scale))
     windows = np.array([actual_xs, actual_ys, actual_ws, actual_hs]).T
     windows = BoundingBox.clipboxes_arr(windows,
                                         (0, 0, image.width, image.height))
     if with_time:
         time_elapsed = time.time() - t
         return (windows, time_elapsed)
     else:
         return windows
Beispiel #13
0
 def get_windows(clas,image,cls=None,window_params=None,with_time=False):
   """
   Return all windows that can be generated with window_params.
   If with_time=True, return tuple of (windows, time_elapsed).
   """
   assert(cls or window_params)
   if not window_params:
     window_params = self.get_default_window_params(cls)
   t = time.time()
   stride = window_params.stride
   min_width = window_params.min_width
   actual_xs = []
   actual_ys = []
   actual_ws = []
   actual_hs = []
   num_windows = 0
   # we want to be able to capture objects that extend past the image
   # we always iterate over locations in native space, and convert to
   # actual image space when we record the window
   w_pad = int(1.*min_width/2)
   x_min = -w_pad
   for scale in window_params.scales:
     x_max = int(image.width*scale)-w_pad
     if w_pad > 0:
       x_max += stride
     actual_w = int(min_width/scale) + 1
     for ratio in window_params.aspect_ratios:
       h_pad = int(1.*min_width*ratio/2)
       y_min = -h_pad
       y_max = int(image.height*scale)-h_pad
       if h_pad > 0:
         y_max += stride
       actual_h = int(min_width/scale * ratio) + 1
       for y in range(y_min,y_max,stride):
         for x in range(x_min,x_max,stride):
           actual_ws.append(actual_w)
           actual_hs.append(actual_h)
           actual_xs.append(int(x/scale))
           actual_ys.append(int(y/scale))
   windows = np.array([actual_xs,actual_ys,actual_ws,actual_hs]).T
   windows = BoundingBox.clipboxes_arr(windows,(0,0,image.width,image.height))
   if with_time:
     time_elapsed = time.time()-t
     return (windows,time_elapsed)
   else:
     return windows
Beispiel #14
0
 def test_get_whole_image_bbox(self):
     image = Image(20, 10, [], 'test_image')
     assert (image.get_whole_image_bbox() == BoundingBox((0, 0, 20, 10)))
     image = Image(2, 100, [], 'test_image')
     assert (image.get_whole_image_bbox() == BoundingBox((0, 0, 2, 100)))
Beispiel #15
0
def main():
    parser = argparse.ArgumentParser(description="Execute different functions of our system")
    parser.add_argument("--first_n", type=int, help="only take the first N images in the datasets")
    parser.add_argument(
        "--name", help="name for this run", default="default", choices=["default", "nolateral", "nohal", "halfsize"]
    )
    parser.add_argument("--force", action="store_true", default=False, help="force overwrite")

    args = parser.parse_args()
    print (args)

    # configuration class
    class config(object):
        pass

    cfg = config()
    cfg.testname = "../ctfdet/data/finalRL/%s2_test"  # object model
    cfg.bottomup = False  # use complete search
    cfg.resize = 1.0  # resize the input image
    cfg.hallucinate = True  # use HOGs up to 4 pixels
    cfg.initr = 1  # initial radious of the CtF search
    cfg.ratio = 1  # radious at the next levels
    cfg.deform = True  # use deformation
    cfg.usemrf = True  # use lateral constraints

    if args.name == "default":
        cfg
        # sticking with the default params
    elif args.name == "nolateral":
        cfg.usemrf = False
    elif args.name == "nohal":
        cfg.hallucinate = False
    elif args.name == "halfsize":
        cfg.resize = 0.5

    # f**k it, do both
    test_datasets = ["val", "test", "train"]
    for test_dataset in test_datasets:
        # Load the dataset
        dataset = Dataset("full_pascal_" + test_dataset)
        if args.first_n:
            dataset.images = dataset.images[: args.first_n]

        # create directory for storing cached detections
        dirname = "./temp_data"
        if os.path.exists("/u/sergeyk"):
            dirname = "/u/vis/x1/sergeyk/object_detection"
        dirname = dirname + "/ctfdets/%s" % (args.name)
        ut.makedirs(dirname)

        num_images = len(dataset.images)
        for img_ind in range(comm_rank, num_images, comm_size):
            # check for existing det
            image = dataset.images[img_ind]
            filename = os.path.join(dirname, image.name + ".npy")
            if os.path.exists(filename) and not args.force:
                # table = np.load(filename)[()]
                continue

            # read the image
            imname = dataset.get_image_filename(img_ind)
            img = util2.myimread(imname, resize=cfg.resize)
            # compute the hog pyramid
            f = pyrHOG2.pyrHOG(
                img, interv=10, savedir="", notsave=True, notload=True, hallucinate=cfg.hallucinate, cformat=True
            )

            # for each class
            all_dets = []
            for ccls in dataset.classes:
                t = time.time()
                cls_ind = dataset.get_ind(ccls)
                print "%s Img %d/%d Class: %s" % (test_dataset, img_ind + 1, num_images, ccls)
                # load the class model
                m = util2.load("%s%d.model" % (cfg.testname % ccls, 7))
                res = []
                t1 = time.time()
                # for each aspect
                for clm, m in enumerate(m):
                    # scan the image with left and right models
                    res.append(
                        pyrHOG2RL.detectflip(
                            f,
                            m,
                            None,
                            hallucinate=cfg.hallucinate,
                            initr=cfg.initr,
                            ratio=cfg.ratio,
                            deform=cfg.deform,
                            bottomup=cfg.bottomup,
                            usemrf=cfg.usemrf,
                            small=False,
                            cl=clm,
                        )
                    )
                fuse = []
                numhog = 0
                # fuse the detections
                for mix in res:
                    tr = mix[0]
                    fuse += mix[1]
                    numhog += mix[3]
                rfuse = tr.rank(fuse, maxnum=300)
                nfuse = tr.cluster(rfuse, ovr=0.3, inclusion=False)
                # print "Number of computed HOGs:",numhog
                time_elapsed = time.time() - t
                print "Elapsed time: %.3f s" % time_elapsed

                bboxes = [nf["bbox"] for nf in nfuse]
                scores = [nf["scr"] for nf in nfuse]
                assert len(bboxes) == len(scores)
                if len(bboxes) > 0:
                    arr = np.zeros((len(bboxes), 7))
                    arr[:, :4] = BoundingBox.convert_arr_from_corners(np.array(bboxes))
                    arr[:, 4] = scores
                    arr[:, 5] = time_elapsed
                    arr[:, 6] = cls_ind
                    all_dets.append(arr)
            cols = ["x", "y", "w", "h", "score", "time", "cls_ind"]
            if len(all_dets) > 0:
                all_dets = np.concatenate(all_dets, 0)
            else:
                all_dets = np.array([])
            table = Table(all_dets, cols)
            np.save(filename, table)
Beispiel #16
0
 def get_whole_image_bbox(self):
     "Return a BoundingBox with (0,0,width,height) of the image."
     return BoundingBox((0, 0, self.width, self.height))
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser(
        description='Execute different functions of our system')
    parser.add_argument('--first_n',
                        type=int,
                        help='only take the first N images in the datasets')
    parser.add_argument('--name',
                        help='name for this run',
                        default='default',
                        choices=['default', 'nolateral', 'nohal', 'halfsize'])
    parser.add_argument('--force',
                        action='store_true',
                        default=False,
                        help='force overwrite')

    args = parser.parse_args()
    print(args)

    #configuration class
    class config(object):
        pass

    cfg = config()
    cfg.testname = "../ctfdet/data/finalRL/%s2_test"  #object model
    cfg.bottomup = False  #use complete search
    cfg.resize = 1.0  #resize the input image
    cfg.hallucinate = True  #use HOGs up to 4 pixels
    cfg.initr = 1  #initial radious of the CtF search
    cfg.ratio = 1  #radious at the next levels
    cfg.deform = True  #use deformation
    cfg.usemrf = True  #use lateral constraints

    if args.name == 'default':
        cfg
        # sticking with the default params
    elif args.name == 'nolateral':
        cfg.usemrf = False
    elif args.name == 'nohal':
        cfg.hallucinate = False
    elif args.name == 'halfsize':
        cfg.resize = 0.5

    # f**k it, do both
    test_datasets = ['val', 'test', 'train']
    for test_dataset in test_datasets:
        # Load the dataset
        dataset = Dataset('full_pascal_' + test_dataset)
        if args.first_n:
            dataset.images = dataset.images[:args.first_n]

        # create directory for storing cached detections
        dirname = './temp_data'
        if os.path.exists('/u/sergeyk'):
            dirname = '/u/vis/x1/sergeyk/object_detection'
        dirname = dirname + '/ctfdets/%s' % (args.name)
        ut.makedirs(dirname)

        num_images = len(dataset.images)
        for img_ind in range(comm_rank, num_images, comm_size):
            # check for existing det
            image = dataset.images[img_ind]
            filename = os.path.join(dirname, image.name + '.npy')
            if os.path.exists(filename) and not args.force:
                #table = np.load(filename)[()]
                continue

            #read the image
            imname = dataset.get_image_filename(img_ind)
            img = util2.myimread(imname, resize=cfg.resize)
            #compute the hog pyramid
            f = pyrHOG2.pyrHOG(img,
                               interv=10,
                               savedir="",
                               notsave=True,
                               notload=True,
                               hallucinate=cfg.hallucinate,
                               cformat=True)

            #for each class
            all_dets = []
            for ccls in dataset.classes:
                t = time.time()
                cls_ind = dataset.get_ind(ccls)
                print "%s Img %d/%d Class: %s" % (test_dataset, img_ind + 1,
                                                  num_images, ccls)
                #load the class model
                m = util2.load("%s%d.model" % (cfg.testname % ccls, 7))
                res = []
                t1 = time.time()
                #for each aspect
                for clm, m in enumerate(m):
                    #scan the image with left and right models
                    res.append(
                        pyrHOG2RL.detectflip(f,
                                             m,
                                             None,
                                             hallucinate=cfg.hallucinate,
                                             initr=cfg.initr,
                                             ratio=cfg.ratio,
                                             deform=cfg.deform,
                                             bottomup=cfg.bottomup,
                                             usemrf=cfg.usemrf,
                                             small=False,
                                             cl=clm))
                fuse = []
                numhog = 0
                #fuse the detections
                for mix in res:
                    tr = mix[0]
                    fuse += mix[1]
                    numhog += mix[3]
                rfuse = tr.rank(fuse, maxnum=300)
                nfuse = tr.cluster(rfuse, ovr=0.3, inclusion=False)
                #print "Number of computed HOGs:",numhog
                time_elapsed = time.time() - t
                print "Elapsed time: %.3f s" % time_elapsed

                bboxes = [nf['bbox'] for nf in nfuse]
                scores = [nf['scr'] for nf in nfuse]
                assert (len(bboxes) == len(scores))
                if len(bboxes) > 0:
                    arr = np.zeros((len(bboxes), 7))
                    arr[:, :4] = BoundingBox.convert_arr_from_corners(
                        np.array(bboxes))
                    arr[:, 4] = scores
                    arr[:, 5] = time_elapsed
                    arr[:, 6] = cls_ind
                    all_dets.append(arr)
            cols = ['x', 'y', 'w', 'h', 'score', 'time', 'cls_ind']
            if len(all_dets) > 0:
                all_dets = np.concatenate(all_dets, 0)
            else:
                all_dets = np.array([])
            table = Table(all_dets, cols)
            np.save(filename, table)
Beispiel #18
0
  def get_windows_new(self, image, cls, metaparams=None, with_time=False, at_most=200000, force=False):
    """
    Generate windows by using ground truth window stats and metaparams.
    metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode'
    metaparams['mode'] can be 'linear' or 'importance' and refers to the method
    of sampling intervals per window parameter.
    If with_time=True, return tuple of (windows, time_elapsed).
    """
    if not metaparams:
      metaparams = {
        'samples_per_500px': 83,
        'num_scales': 12,
        'num_ratios': 6,
        'mode': 'importance',
        'priority': 0}

    t = time.time()
    x_samples = int(image.width/500. * metaparams['samples_per_500px'])
    y_samples = int(image.height/500. * metaparams['samples_per_500px'])

    # check for cached windows and return if found
    dirname = config.get_sliding_windows_cached_dir(self.train_name)
    filename = '%s_%d_%d_%s_%s_%d_%d_%d.npy'%(
        cls,
        metaparams['samples_per_500px'],
        metaparams['num_scales'],
        metaparams['num_ratios'],
        metaparams['mode'],
        metaparams['priority'],
        x_samples, y_samples)
    filename = os.path.join(dirname,filename)
    if os.path.exists(filename) and not force:
      windows = np.load(filename)
    else:
      # fine, we'll figure out the windows again
      # load the kde for x_scaled,y_scaled,scale,log_ratio
      stats = self.get_stats() 
      kde = stats['%s_kde'%cls]
      x_frac = kde.dataset[0,:]
      y_frac = kde.dataset[1,:]
      scale = kde.dataset[2,:]
      log_ratio = kde.dataset[3,:]

      # given the metaparameters, sample points to generate the complete list of
      # parameter combinations
      if metaparams['mode'] == 'linear':
        x_points = np.linspace(x_frac.min(),x_frac.max(),x_samples)
        y_points = np.linspace(y_frac.min(),y_frac.max(),y_samples)
        scale_points = np.linspace(scale.min(),scale.max(),metaparams['num_scales'])
        ratio_points = np.linspace(log_ratio.min(),log_ratio.max(),metaparams['num_ratios'])
      elif metaparams['mode'] == 'importance':
        x_points = ut.importance_sample(x_frac,x_samples,stats['%s_%s_kde'%(cls,'x_frac')])
        y_points = ut.importance_sample(y_frac,y_samples,stats['%s_%s_kde'%(cls,'y_frac')])
        scale_points = ut.importance_sample(scale,metaparams['num_scales'],stats['%s_%s_kde'%(cls,'scale')])
        ratio_points = ut.importance_sample(log_ratio,metaparams['num_ratios'],stats['%s_%s_kde'%(cls,'log_ratio')])
      else:
        raise RuntimeError("Invalid mode")

      combinations = [x for x in itertools.product(x_points,y_points,scale_points,ratio_points)]
      combinations = np.array(combinations).T
      
      # only take the top-scoring detections
      if metaparams['priority']:
        t22=time.time()
        scores = kde(combinations) # (so slow!)
        print("kde took %.3f s"%(time.time()-t22))
        sorted_inds = np.argsort(-scores)
        max_num = min(at_most,sorted_inds.size)
        combinations = combinations[:,sorted_inds[:max_num]]

      # convert to x,y,scale,ratio,w,h
      scale = combinations[2,:]
      # x = x_frac*img_width
      x = combinations[0,:]*img_width
      # ratio = exp(log_ratio)
      ratio = np.exp(combinations[3,:])
      # y = y_frac*img_height
      y = combinations[1,:]*img_height
      # w = scale*min_width
      w = scale*SlidingWindows.MIN_WIDTH
      # h = w*ratio
      h = w * ratio

      combinations[0,:] = x
      combinations[1,:] = y
      combinations[2,:] = w
      combinations[3,:] = h
      windows = combinations.T
      windows = BoundingBox.clipboxes_arr(windows,(0,0,img_width,img_height))
      np.save(filename,windows) # does not take more than 0.5 sec even for 10**6 windows

    time_elapsed = time.time()-t
    print("get_windows_new() got %d windows in %.3fs"%(windows.shape[0],time_elapsed))
    if with_time:
      return (windows,time_elapsed)
    else:
      return windows
Beispiel #19
0
    def get_recalls(self, cls, metaparams, mode, window_intervals,
                    min_overlaps):
        """
    Return nparray of num_intervals x num_overlaps, with each entry specifying
    the recall for that combination of window_interval and min_overlap.
    window_intervals must begin with 0.
    mode must be in ['sw','jw']
    """
        assert (window_intervals[0] == 0)
        num_overlaps = len(min_overlaps)
        num_intervals = len(window_intervals)
        times = [0]
        window_nums = [0]
        image_inds = self.dataset.get_pos_samples_for_class(cls)
        num_images = len(image_inds)
        # we are building up a num_images x num_intervals+1 x num_overlaps array
        array = np.zeros((num_images, num_intervals + 1, num_overlaps))
        for i in range(num_images):
            ind = image_inds[i]
            image = self.dataset.images[ind]
            # the first interval is 0, so there aren't any window proposals
            array[i, 0, :] = 0
            gts = image.get_ground_truth(cls)
            num_gt = gts.shape[0]
            # the last row of the matrix is the number of ground truth
            array[i, num_intervals, :] = num_gt
            # now get the windows and append the statistics information
            #windows,time_elapsed = window_generator.get_windows(image,cls,with_time=True)

            if mode == 'sw':
                windows, time_elapsed = self.get_windows_new(
                    image,
                    cls,
                    metaparams,
                    with_time=True,
                    at_most=max(window_intervals))
            elif mode == 'jw':
                windows, time_elapsed = self.jw.get_windows(image,
                                                            cls,
                                                            K=10000)
            else:
                raise RuntimeError('impossible mode')

            # shuffle the windows if we want to take them in random order
            if mode == 'sw' and not metaparams['priority']:
                rand_ind = np.random.permutation(windows.shape[0])
                windows = windows[rand_ind, :]

            window_nums.append(windows.shape[0])
            times.append(time_elapsed)
            # go through each interval and count how many ground truth are matched
            for j in range(1, len(window_intervals)):
                max_ind = window_intervals[j]
                # if we are going to ask for more windows that are available,
                # the recall is going to be the same as before, so just add that
                if max_ind > windows.shape[0]:
                    array[i, j, :] = array[i, j - 1, :]
                    continue
                # otherwise, count the number of ground truths that are overlapped
                # NOTE: a single window can overlap multiple ground truth in this
                # scheme
                for gt in gts.arr:
                    overlaps = BoundingBox.get_overlap(windows[:max_ind, :4],
                                                       gt[:4])
                    for k, min_overlap in enumerate(min_overlaps):
                        if np.any(overlaps >= min_overlap):
                            array[i, j, k] += 1
        print(
            "Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec" %
            (np.mean(window_nums), np.std(window_nums), np.mean(times),
             np.std(times)))
        # reduce to num_intervals+1 x num_overlaps
        sum_array = np.sum(array, axis=0)
        # reduce to num_intervals x num_overlaps
        recalls = sum_array[:-1, :] / sum_array[-1, :]
        return recalls
Beispiel #20
0
    def get_windows_new(self,
                        image,
                        cls,
                        metaparams=None,
                        with_time=False,
                        at_most=200000,
                        force=False):
        """
    Generate windows by using ground truth window stats and metaparams.
    metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode'
    metaparams['mode'] can be 'linear' or 'importance' and refers to the method
    of sampling intervals per window parameter.
    If with_time=True, return tuple of (windows, time_elapsed).
    """
        if not metaparams:
            metaparams = {
                'samples_per_500px': 83,
                'num_scales': 12,
                'num_ratios': 6,
                'mode': 'importance',
                'priority': 0
            }

        t = time.time()
        x_samples = int(image.width / 500. * metaparams['samples_per_500px'])
        y_samples = int(image.height / 500. * metaparams['samples_per_500px'])

        # check for cached windows and return if found
        dirname = config.get_sliding_windows_cached_dir(self.train_name)
        filename = '%s_%d_%d_%s_%s_%d_%d_%d.npy' % (
            cls, metaparams['samples_per_500px'], metaparams['num_scales'],
            metaparams['num_ratios'], metaparams['mode'],
            metaparams['priority'], x_samples, y_samples)
        filename = os.path.join(dirname, filename)
        if os.path.exists(filename) and not force:
            windows = np.load(filename)
        else:
            # fine, we'll figure out the windows again
            # load the kde for x_scaled,y_scaled,scale,log_ratio
            stats = self.get_stats()
            kde = stats['%s_kde' % cls]
            x_frac = kde.dataset[0, :]
            y_frac = kde.dataset[1, :]
            scale = kde.dataset[2, :]
            log_ratio = kde.dataset[3, :]

            # given the metaparameters, sample points to generate the complete list of
            # parameter combinations
            if metaparams['mode'] == 'linear':
                x_points = np.linspace(x_frac.min(), x_frac.max(), x_samples)
                y_points = np.linspace(y_frac.min(), y_frac.max(), y_samples)
                scale_points = np.linspace(scale.min(), scale.max(),
                                           metaparams['num_scales'])
                ratio_points = np.linspace(log_ratio.min(), log_ratio.max(),
                                           metaparams['num_ratios'])
            elif metaparams['mode'] == 'importance':
                x_points = ut.importance_sample(
                    x_frac, x_samples, stats['%s_%s_kde' % (cls, 'x_frac')])
                y_points = ut.importance_sample(
                    y_frac, y_samples, stats['%s_%s_kde' % (cls, 'y_frac')])
                scale_points = ut.importance_sample(
                    scale, metaparams['num_scales'],
                    stats['%s_%s_kde' % (cls, 'scale')])
                ratio_points = ut.importance_sample(
                    log_ratio, metaparams['num_ratios'],
                    stats['%s_%s_kde' % (cls, 'log_ratio')])
            else:
                raise RuntimeError("Invalid mode")

            combinations = [
                x for x in itertools.product(x_points, y_points, scale_points,
                                             ratio_points)
            ]
            combinations = np.array(combinations).T

            # only take the top-scoring detections
            if metaparams['priority']:
                t22 = time.time()
                scores = kde(combinations)  # (so slow!)
                print("kde took %.3f s" % (time.time() - t22))
                sorted_inds = np.argsort(-scores)
                max_num = min(at_most, sorted_inds.size)
                combinations = combinations[:, sorted_inds[:max_num]]

            # convert to x,y,scale,ratio,w,h
            scale = combinations[2, :]
            # x = x_frac*img_width
            x = combinations[0, :] * img_width
            # ratio = exp(log_ratio)
            ratio = np.exp(combinations[3, :])
            # y = y_frac*img_height
            y = combinations[1, :] * img_height
            # w = scale*min_width
            w = scale * SlidingWindows.MIN_WIDTH
            # h = w*ratio
            h = w * ratio

            combinations[0, :] = x
            combinations[1, :] = y
            combinations[2, :] = w
            combinations[3, :] = h
            windows = combinations.T
            windows = BoundingBox.clipboxes_arr(windows,
                                                (0, 0, img_width, img_height))
            np.save(filename, windows
                    )  # does not take more than 0.5 sec even for 10**6 windows

        time_elapsed = time.time() - t
        print("get_windows_new() got %d windows in %.3fs" %
              (windows.shape[0], time_elapsed))
        if with_time:
            return (windows, time_elapsed)
        else:
            return windows
Beispiel #21
0
def test_convert_to_and_fro():
  bb = np.array([ 139.,  200.,   69.,  102.])
  bb_c = BoundingBox.convert_arr_to_corners(bb)
  bb2 = BoundingBox.convert_arr_from_corners(bb_c)
  assert(np.all(bb == bb2))
Beispiel #22
0
  def get_recalls(self,cls,metaparams,mode,window_intervals,min_overlaps):
    """
    Return nparray of num_intervals x num_overlaps, with each entry specifying
    the recall for that combination of window_interval and min_overlap.
    window_intervals must begin with 0.
    mode must be in ['sw','jw']
    """
    assert(window_intervals[0] == 0)
    num_overlaps = len(min_overlaps)
    num_intervals = len(window_intervals)
    times = [0]
    window_nums = [0]
    image_inds = self.dataset.get_pos_samples_for_class(cls)
    num_images = len(image_inds)
    # we are building up a num_images x num_intervals+1 x num_overlaps array
    array = np.zeros((num_images,num_intervals+1,num_overlaps))
    for i in range(num_images):
      ind = image_inds[i]
      image = self.dataset.images[ind]
      # the first interval is 0, so there aren't any window proposals 
      array[i,0,:] = 0
      gts = image.get_ground_truth(cls)
      num_gt = gts.shape[0]
      # the last row of the matrix is the number of ground truth
      array[i,num_intervals,:] = num_gt
      # now get the windows and append the statistics information
      #windows,time_elapsed = window_generator.get_windows(image,cls,with_time=True)

      if mode=='sw': 
        windows,time_elapsed = self.get_windows_new(image,cls,metaparams,with_time=True,at_most=max(window_intervals))
      elif mode=='jw':
        windows,time_elapsed = self.jw.get_windows(image,cls,K=10000)
      else:
        raise RuntimeError('impossible mode')

      # shuffle the windows if we want to take them in random order
      if mode=='sw' and not metaparams['priority']:
        rand_ind = np.random.permutation(windows.shape[0])
        windows = windows[rand_ind,:]

      window_nums.append(windows.shape[0])
      times.append(time_elapsed)
      # go through each interval and count how many ground truth are matched
      for j in range(1,len(window_intervals)):
        max_ind = window_intervals[j]
        # if we are going to ask for more windows that are available,
        # the recall is going to be the same as before, so just add that
        if max_ind>windows.shape[0]:
          array[i,j,:] = array[i,j-1,:]
          continue
        # otherwise, count the number of ground truths that are overlapped
        # NOTE: a single window can overlap multiple ground truth in this
        # scheme
        for gt in gts.arr:
          overlaps = BoundingBox.get_overlap(windows[:max_ind,:4],gt[:4])
          for k,min_overlap in enumerate(min_overlaps):
            if np.any(overlaps>=min_overlap):
              array[i,j,k] += 1
    print("Windows generated per image: %d +/- %.3f, in %.3f +/- %.3f sec"%(
      np.mean(window_nums),np.std(window_nums),
      np.mean(times),np.std(times)))
    # reduce to num_intervals+1 x num_overlaps
    sum_array = np.sum(array,axis=0)
    # reduce to num_intervals x num_overlaps
    recalls = sum_array[:-1,:]/sum_array[-1,:]
    return recalls