Ejemplo n.º 1
0
def main():
  parser = argparse.ArgumentParser(description='Execute different functions of our system')
  parser.add_argument('mode',
    choices=[
      'window_stats', 'evaluate_metaparams', 'evaluate_jw',
      'evaluate_get_pos_windows', 'train_svm',
      'extract_sift','extract_assignments','extract_codebook',
      'evaluate_jw_grid', 'final_metaparams',
      'assemble_dpm_dets','ctfdet','assemble_ctf_dets'
      ])
  parser.add_argument('--test_dataset', choices=['val','test','train'],
      default='test', help='dataset to use for testing. the training dataset \
      is automatically inferred (val->train and test->trainval).')
  parser.add_argument('--first_n', type=int,
      help='only take the first N images in the datasets')
  parser.add_argument('--bounds', type=str,
      help='the start_time and deadline_time for the ImagePolicy and corresponding evaluation. ex: (1,5)')
  parser.add_argument('--name', help='name for this run')
  parser.add_argument('--priors', default='random', help= \
      "list of choice for the policy for selecting the next action. choose from random, oracle,fixed_order, no_smooth, backoff. ex: --priors=random,oracle,no_smooth")
  parser.add_argument('--compare_evals', action='store_true', 
      default=False, help='plot all the priors modes given on same plot'),
  parser.add_argument('--detector', choices=['perfect','perfect_with_noise', 'dpm','ctf'],
      default='perfect', help='detector type')
  parser.add_argument('--force', action='store_true', 
      default=False, help='force overwrite')
  parser.add_argument('--gist', action='store_true', 
      default=False, help='use GIST as one of the actions')
  parser.add_argument('--clear_tmp', action='store_true', 
      default=False, help='clear the cached windows folder before running'),
  parser.add_argument('--feature_type', choices=['sift','dsift'], 
      default='dsift', help='use this feature type'),
  parser.add_argument('--kernel', choices=['chi2','rbf'], 
      default='chi2', help='kernel to train svm on'),
      
  args = parser.parse_args()
  if args.priors:
    args.priors = args.priors.split(',')
  if args.bounds:
    args.bounds = [float(x) for x in re.findall(r'\d+', args.bounds)]
    assert(len(args.bounds)==2)
  print(args)

  # Load the dataset
  dataset = Dataset('full_pascal_'+args.test_dataset)
  if args.first_n:
    dataset.images = dataset.images[:args.first_n]

  # Infer train_dataset
  if args.test_dataset=='test':
    train_dataset = Dataset('full_pascal_trainval')
  elif args.test_dataset=='val':
    train_dataset = Dataset('full_pascal_train')
  else:
    print("Impossible, setting train_dataset to dataset")
    train_dataset = dataset
  
  # Create window generator
  sw = SlidingWindows(dataset,train_dataset)

  if args.clear_tmp:
    dirname = config.get_sliding_windows_cached_dir(train_dataset.get_name())
    shutil.rmtree(dirname)
    dirname = config.get_sliding_windows_cached_dir(dataset.get_name())
    shutil.rmtree(dirname)

  if args.mode=='assemble_dpm_dets':
    policy = DatasetPolicy(dataset,train_dataset,sw)
    dets = policy.load_ext_detections(dataset,suffix='dpm_may25')

  if args.mode=='assemble_ctf_dets':
    policy = DatasetPolicy(dataset,train_dataset,sw)
    dets = policy.load_ext_detections(dataset,'ctf','ctf_default')
    dets = policy.load_ext_detections(dataset,'ctf','ctf_nohal')
    dets = policy.load_ext_detections(dataset,'ctf', 'ctf_halfsize')

  if args.mode=='evaluate_get_pos_windows':
    evaluate_get_pos_windows(train_dataset)
    return

  if args.mode=='window_stats':
    "Compute and plot the statistics of ground truth window parameters."
    results = SlidingWindows.get_dataset_window_stats(train_dataset,plot=True)

  if args.mode=='ctfdet':
    """Run Pedersoli's detector on the dataset and assemble into one Table."""
    run_pedersoli(dataset)

  if args.mode=='evaluate_jw':
    """
    Evaluate the jumping window approach by producing plots of recall vs.
    #windows.
    """
    # TODO hack: both sw and jw should subclass something like WindowGenerator
    jw = JumpingWindowsDetector(use_scale=True)
    sw.jw = jw
    #classes = dataset.classes
    classes = ['car']
#    classes = ['bicycle' ,'car','horse', 'sofa',\
#               'bird',  'chair',     'motorbike', 'train',\
#               'boat',  'cow',       'person',    'tvmonitor',\
#               'bottle','diningtable',  'pottedplant',\
#               'bus','dog'     ,'sheep']
    for cls_idx in range(comm_rank, len(classes), comm_size):
    #for cls in dataset.classes:
      cls = classes[cls_idx]
      dirname = config.get_jumping_windows_dir(dataset.get_name())
      filename = os.path.join(dirname,'%s'%cls)
      sw.evaluate_recall(cls, filename, metaparams=None, mode='jw', plot=True)
  
  if args.mode=='evaluate_jw_grid':
    """
    Evaluate the jumping window approach by producing plots of recall vs.
    #windows.
    """
    sw = SlidingWindows(dataset,train_dataset)
    jw = JumpingWindowsDetectorGrid()
    sw.jw = jw
    for cls in dataset.classes:
      dirname = config.get_jumping_windows_dir(dataset.get_name())
      filename = os.path.join(dirname,'%s'%cls)
      if os.path.isfile(config.data_dir + 'JumpingWindows/'+cls):
        sw.evaluate_recall(cls, filename, metaparams=None, mode='jw', plot=True)

  if args.mode=='train_svm':
    randomize = not os.path.exists('/home/tobibaum')
    
    d = Dataset('full_pascal_train')
    dtest = Dataset('full_pascal_val')  
    e = Extractor()  
    classes = config.pascal_classes  
    num_words = 3000
    iters = 5
    feature_type = 'dsift'
    codebook_samples = 15
    num_pos = 'max'
    testsize = 'max'
    if args.first_n:
      num_pos = args.first_n
      testsize = 1.5*num_pos
     
    kernel = args.kernel
    
    if comm_rank == 0:
      ut.makedirs(config.data_dir + 'features/' + feature_type + '/times/')
      ut.makedirs(config.data_dir + 'features/' + feature_type + '/codebooks/times/')
      ut.makedirs(config.data_dir + 'features/' + feature_type + '/svms/train_times/')
      
    for cls_idx in range(comm_rank, len(classes), comm_size): 
    #for cls in classes:
      cls = classes[cls_idx]
      codebook = e.get_codebook(d, feature_type)
      pos_arr = d.get_pos_windows(cls)
      
      neg_arr = d.get_neg_windows(pos_arr.shape[0], cls, max_overlap=0)
      
      if not num_pos == 'max':    
        if not randomize:
          pos_arr = pos_arr[:num_pos]
          neg_arr = pos_arr[:num_pos]
        else:
          rand = np.random.random_integers(0, pos_arr.shape[0] - 1, size=num_pos)
          pos_arr = pos_arr[rand]
          rand = np.random.random_integers(0, neg_arr.shape[0] - 1, size=num_pos)
          neg_arr = neg_arr[rand]     
      pos_table = Table(pos_arr, ['x','y','w','h','img_ind'])
      neg_table = Table(neg_arr, pos_table.cols)      
      train_with_hard_negatives(d, dtest,  num_words,codebook_samples,codebook,\
                                cls, pos_table, neg_table,feature_type, \
                                iterations=iters, kernel=kernel, L=2, \
                                testsize=testsize,randomize=randomize)

  if args.mode=='evaluate_metaparams':
    """
    Grid search over metaparams values for get_windows_new, with the AUC of
    recall vs. # windows evaluation.
    """
    sw.grid_search_over_metaparams()
    return

  if args.mode=='final_metaparams':
    dirname = config.get_sliding_windows_metaparams_dir(train_dataset.get_name())
    # currently these are the best auc/complexity params
    best_params_for_classes = [
        (62,15,12,'importance',0), #aeroplane
        (83,15,12,'importance',0), #bicycle
        (62,15,12,'importance',0), #bird
        (62,15,12,'importance',0), #boat
        (125,12,12,'importance',0), #bottle
        (83,12,9,'importance',0), #bus
        (125,15,9,'importance',0), #car
        (125,12,12,'linear',0), #cat
        (125,15,9,'importance',0), #chair
        (125,9,6,'importance',0), #cow
        (125,15,6,'linear',0), #diningtable
        (62,15,12,'importance',0), #dog
        (83,15,6,'importance',0), #horse
        (83,12,6,'importance',0), #motorbike
        (83,15,12,'importance',0), #person
        (83,15,6,'importance',0), #pottedplant
        (83,15,12,'importance',0), #sheep
        (83,9,6,'importance',0), #sofa
        (62,12,6,'importance',0), #train
        (62,12,12,'importance',0), #tvmonitor
        (125,9,12,'importance',0) #all
        ]
    # ACTUALLY THEY ARE ALL THE SAME!
    cheap_params = (62, 9, 6, 'importance', 0)
    for i in range(comm_rank,dataset.num_classes(),comm_size):
      cls = dataset.classes[i]
      best_params = best_params_for_classes[i]
      #samples,num_scales,num_ratios,mode,priority,cls = cheap_params

      metaparams = {
        'samples_per_500px': samples,
        'num_scales': num_scales,
        'num_ratios': num_ratios,
        'mode': mode,
        'priority': 0 }
      filename = '%s_%d_%d_%d_%s_%d'%(
          cls,
          metaparams['samples_per_500px'],
          metaparams['num_scales'],
          metaparams['num_ratios'],
          metaparams['mode'],
          metaparams['priority'])
      filename = os.path.join(dirname,filename)

      tables = sw.evaluate_recall(cls,filename,metaparams,'sw',plot=True,force=False)

      metaparams = {
        'samples_per_500px': samples,
        'num_scales': num_scales,
        'num_ratios': num_ratios,
        'mode': mode,
        'priority': 1 }
      filename = '%s_%d_%d_%d_%s_%d'%(
          cls,
          metaparams['samples_per_500px'],
          metaparams['num_scales'],
          metaparams['num_ratios'],
          metaparams['mode'],
          metaparams['priority'])
      filename = os.path.join(dirname,filename)

      tables = sw.evaluate_recall(cls,filename,metaparams,'sw',plot=True,force=False)
    return

  if args.mode=='extract_sift':
    e=Extractor()
    e.extract_all(['sift'], ['full_pascal_trainval','full_pascal_test'], 0, 0) 
    
  if args.mode=='extract_assignments':
    e=Extractor()
    feature_type = 'sift'
    for image_set in ['full_pascal_trainval','full_pascal_test']:
      d = Dataset(image_set)
      codebook = e.get_codebook(d, feature_type)  
      print 'codebook loaded'
      
      for img_ind in range(comm_rank,len(d.images),comm_size):
        img = d.images[img_ind]
      #for img in d.images:
        e.get_assignments(np.array([0,0,img.size[0],img.size[1]]), feature_type, \
                          codebook, img)

  if args.mode=='extract_codebook':
    d = Dataset('full_pascal_trainval')
    e = Extractor()
    codebook = e.get_codebook(d, args.feature_type)
Ejemplo n.º 2
0
    def get_windows_new(self,
                        image,
                        cls,
                        metaparams=None,
                        with_time=False,
                        at_most=200000,
                        force=False):
        """
    Generate windows by using ground truth window stats and metaparams.
    metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode'
    metaparams['mode'] can be 'linear' or 'importance' and refers to the method
    of sampling intervals per window parameter.
    If with_time=True, return tuple of (windows, time_elapsed).
    """
        if not metaparams:
            metaparams = {
                'samples_per_500px': 83,
                'num_scales': 12,
                'num_ratios': 6,
                'mode': 'importance',
                'priority': 0
            }

        t = time.time()
        x_samples = int(image.width / 500. * metaparams['samples_per_500px'])
        y_samples = int(image.height / 500. * metaparams['samples_per_500px'])

        # check for cached windows and return if found
        dirname = config.get_sliding_windows_cached_dir(self.train_name)
        filename = '%s_%d_%d_%s_%s_%d_%d_%d.npy' % (
            cls, metaparams['samples_per_500px'], metaparams['num_scales'],
            metaparams['num_ratios'], metaparams['mode'],
            metaparams['priority'], x_samples, y_samples)
        filename = os.path.join(dirname, filename)
        if os.path.exists(filename) and not force:
            windows = np.load(filename)
        else:
            # fine, we'll figure out the windows again
            # load the kde for x_scaled,y_scaled,scale,log_ratio
            stats = self.get_stats()
            kde = stats['%s_kde' % cls]
            x_frac = kde.dataset[0, :]
            y_frac = kde.dataset[1, :]
            scale = kde.dataset[2, :]
            log_ratio = kde.dataset[3, :]

            # given the metaparameters, sample points to generate the complete list of
            # parameter combinations
            if metaparams['mode'] == 'linear':
                x_points = np.linspace(x_frac.min(), x_frac.max(), x_samples)
                y_points = np.linspace(y_frac.min(), y_frac.max(), y_samples)
                scale_points = np.linspace(scale.min(), scale.max(),
                                           metaparams['num_scales'])
                ratio_points = np.linspace(log_ratio.min(), log_ratio.max(),
                                           metaparams['num_ratios'])
            elif metaparams['mode'] == 'importance':
                x_points = ut.importance_sample(
                    x_frac, x_samples, stats['%s_%s_kde' % (cls, 'x_frac')])
                y_points = ut.importance_sample(
                    y_frac, y_samples, stats['%s_%s_kde' % (cls, 'y_frac')])
                scale_points = ut.importance_sample(
                    scale, metaparams['num_scales'],
                    stats['%s_%s_kde' % (cls, 'scale')])
                ratio_points = ut.importance_sample(
                    log_ratio, metaparams['num_ratios'],
                    stats['%s_%s_kde' % (cls, 'log_ratio')])
            else:
                raise RuntimeError("Invalid mode")

            combinations = [
                x for x in itertools.product(x_points, y_points, scale_points,
                                             ratio_points)
            ]
            combinations = np.array(combinations).T

            # only take the top-scoring detections
            if metaparams['priority']:
                t22 = time.time()
                scores = kde(combinations)  # (so slow!)
                print("kde took %.3f s" % (time.time() - t22))
                sorted_inds = np.argsort(-scores)
                max_num = min(at_most, sorted_inds.size)
                combinations = combinations[:, sorted_inds[:max_num]]

            # convert to x,y,scale,ratio,w,h
            scale = combinations[2, :]
            # x = x_frac*img_width
            x = combinations[0, :] * img_width
            # ratio = exp(log_ratio)
            ratio = np.exp(combinations[3, :])
            # y = y_frac*img_height
            y = combinations[1, :] * img_height
            # w = scale*min_width
            w = scale * SlidingWindows.MIN_WIDTH
            # h = w*ratio
            h = w * ratio

            combinations[0, :] = x
            combinations[1, :] = y
            combinations[2, :] = w
            combinations[3, :] = h
            windows = combinations.T
            windows = BoundingBox.clipboxes_arr(windows,
                                                (0, 0, img_width, img_height))
            np.save(filename, windows
                    )  # does not take more than 0.5 sec even for 10**6 windows

        time_elapsed = time.time() - t
        print("get_windows_new() got %d windows in %.3fs" %
              (windows.shape[0], time_elapsed))
        if with_time:
            return (windows, time_elapsed)
        else:
            return windows
Ejemplo n.º 3
0
  def get_windows_new(self, image, cls, metaparams=None, with_time=False, at_most=200000, force=False):
    """
    Generate windows by using ground truth window stats and metaparams.
    metaparams must contain keys 'samples_per_500px', 'num_scales', 'num_ratios', 'mode'
    metaparams['mode'] can be 'linear' or 'importance' and refers to the method
    of sampling intervals per window parameter.
    If with_time=True, return tuple of (windows, time_elapsed).
    """
    if not metaparams:
      metaparams = {
        'samples_per_500px': 83,
        'num_scales': 12,
        'num_ratios': 6,
        'mode': 'importance',
        'priority': 0}

    t = time.time()
    x_samples = int(image.width/500. * metaparams['samples_per_500px'])
    y_samples = int(image.height/500. * metaparams['samples_per_500px'])

    # check for cached windows and return if found
    dirname = config.get_sliding_windows_cached_dir(self.train_name)
    filename = '%s_%d_%d_%s_%s_%d_%d_%d.npy'%(
        cls,
        metaparams['samples_per_500px'],
        metaparams['num_scales'],
        metaparams['num_ratios'],
        metaparams['mode'],
        metaparams['priority'],
        x_samples, y_samples)
    filename = os.path.join(dirname,filename)
    if os.path.exists(filename) and not force:
      windows = np.load(filename)
    else:
      # fine, we'll figure out the windows again
      # load the kde for x_scaled,y_scaled,scale,log_ratio
      stats = self.get_stats() 
      kde = stats['%s_kde'%cls]
      x_frac = kde.dataset[0,:]
      y_frac = kde.dataset[1,:]
      scale = kde.dataset[2,:]
      log_ratio = kde.dataset[3,:]

      # given the metaparameters, sample points to generate the complete list of
      # parameter combinations
      if metaparams['mode'] == 'linear':
        x_points = np.linspace(x_frac.min(),x_frac.max(),x_samples)
        y_points = np.linspace(y_frac.min(),y_frac.max(),y_samples)
        scale_points = np.linspace(scale.min(),scale.max(),metaparams['num_scales'])
        ratio_points = np.linspace(log_ratio.min(),log_ratio.max(),metaparams['num_ratios'])
      elif metaparams['mode'] == 'importance':
        x_points = ut.importance_sample(x_frac,x_samples,stats['%s_%s_kde'%(cls,'x_frac')])
        y_points = ut.importance_sample(y_frac,y_samples,stats['%s_%s_kde'%(cls,'y_frac')])
        scale_points = ut.importance_sample(scale,metaparams['num_scales'],stats['%s_%s_kde'%(cls,'scale')])
        ratio_points = ut.importance_sample(log_ratio,metaparams['num_ratios'],stats['%s_%s_kde'%(cls,'log_ratio')])
      else:
        raise RuntimeError("Invalid mode")

      combinations = [x for x in itertools.product(x_points,y_points,scale_points,ratio_points)]
      combinations = np.array(combinations).T
      
      # only take the top-scoring detections
      if metaparams['priority']:
        t22=time.time()
        scores = kde(combinations) # (so slow!)
        print("kde took %.3f s"%(time.time()-t22))
        sorted_inds = np.argsort(-scores)
        max_num = min(at_most,sorted_inds.size)
        combinations = combinations[:,sorted_inds[:max_num]]

      # convert to x,y,scale,ratio,w,h
      scale = combinations[2,:]
      # x = x_frac*img_width
      x = combinations[0,:]*img_width
      # ratio = exp(log_ratio)
      ratio = np.exp(combinations[3,:])
      # y = y_frac*img_height
      y = combinations[1,:]*img_height
      # w = scale*min_width
      w = scale*SlidingWindows.MIN_WIDTH
      # h = w*ratio
      h = w * ratio

      combinations[0,:] = x
      combinations[1,:] = y
      combinations[2,:] = w
      combinations[3,:] = h
      windows = combinations.T
      windows = BoundingBox.clipboxes_arr(windows,(0,0,img_width,img_height))
      np.save(filename,windows) # does not take more than 0.5 sec even for 10**6 windows

    time_elapsed = time.time()-t
    print("get_windows_new() got %d windows in %.3fs"%(windows.shape[0],time_elapsed))
    if with_time:
      return (windows,time_elapsed)
    else:
      return windows