Ejemplo n.º 1
0
class TestBeliefState(object):
  def setup(self):
    d = Dataset('test_pascal_trainval',force=True)
    d2 = Dataset('test_pascal_test',force=True)
    config = {'detectors': ['csc_default']}
    self.dp = DatasetPolicy(d,d2,**config)
    self.bs = BeliefState(d,self.dp.actions)

  def test_featurization(self):
    ff = self.bs.compute_full_feature()
    np.set_printoptions(precision=2)
    print self.bs.block_out_action(ff,-1)
    print self.bs.block_out_action(ff,0)
    print self.bs.block_out_action(ff,3)
Ejemplo n.º 2
0
class TestBeliefState(object):
    def setup(self):
        d = Dataset('test_pascal_trainval', force=True)
        d2 = Dataset('test_pascal_test', force=True)
        config = {'detectors': ['csc_default']}
        self.dp = DatasetPolicy(d, d2, **config)
        self.bs = BeliefState(d, self.dp.actions)

    def test_featurization(self):
        ff = self.bs.compute_full_feature()
        np.set_printoptions(precision=2)
        print self.bs.block_out_action(ff, -1)
        print self.bs.block_out_action(ff, 0)
        print self.bs.block_out_action(ff, 3)
Ejemplo n.º 3
0
    def run_on_image(self, image, dataset, verbose=False, epsilon=0.01):
        """
    Return
    - list of detections in the image, with each row as self.det_columns
    - list of multi-label classification outputs, with each row as self.get_cls_cols()
    - list of <s,a,r,s',dt> samples.
    """
        gt = image.get_det_gt(with_diff=True)
        self.tt.tic('run_on_image')

        all_detections = []
        all_clses = []
        samples = []
        prev_ap = 0
        img_ind = dataset.get_img_ind(image)

        # If we have previously run_on_image(), then we already have a reference to an inf_model
        # Otherwise, we make a new one and store a reference to it, to keep it alive
        if hasattr(self, 'inf_model'):
            b = BeliefState(self.train_dataset, self.actions,
                            self.inference_mode, self.bounds, self.inf_model,
                            self.fastinf_model_name)
        else:
            b = BeliefState(self.train_dataset,
                            self.actions,
                            self.inference_mode,
                            self.bounds,
                            fastinf_model_name=self.fastinf_model_name)
            self.b = b
            self.inf_model = b.model

        self.update_actions(b)
        action_ind = self.select_action(b, epsilon)
        step_ind = 0
        initial_clses = np.array(b.get_p_c().tolist() + [img_ind, 0])
        entropy_prev = np.mean(b.get_entropies())
        while True:
            # Populate the sample with stuff we know
            sample = Sample()
            sample.step_ind = step_ind
            sample.img_ind = img_ind
            sample.state = b.compute_full_feature()
            sample.action_ind = action_ind

            # TODO: this is incorrect, and results in samples at t=0 to already have detections
            sample.t = b.t

            # prepare for AUC reward stuff
            # TODO: should set time_to_deadline to -Inf if no bounds
            time_to_deadline = 0
            if self.bounds:
                # this should never be less than zero, except for when running oracle
                time_to_deadline = max(0, self.bounds[1] - b.t)
            sample.auc_ap_raw = 0
            sample.auc_ap = 0

            # Take the action and get the observations as a dict
            action = self.actions[action_ind]
            obs = action.obj.get_observations(image)
            dt = obs['dt']

            # If observations include detections, compute the relevant
            # stuff for the sample collection
            sample.det_naive_ap = 0
            sample.det_actual_ap = 0
            if not 'dets' in obs:
                all_detections.append(np.array([]))
            else:
                det = action.obj
                dets = obs['dets']
                cls_ind = dataset.classes.index(det.cls)
                if dets.shape[0] > 0:
                    c_vector = np.tile(cls_ind, (np.shape(dets)[0], 1))
                    i_vector = np.tile(img_ind, (np.shape(dets)[0], 1))
                    detections = np.hstack((dets, c_vector, i_vector))
                else:
                    detections = np.array([])
                dets_table = Table(detections,
                                   det.columns + ['cls_ind', 'img_ind'])

                # compute the 'naive' det AP increase,
                # as if we were adding dets to an empty set
                #ap,rec,prec = self.ev.compute_det_pr(dets_table,gt)
                ap = self.ev.compute_det_map(dets_table, gt)
                sample.det_naive_ap = ap

                # TODO: am I needlessly recomputing this table?
                all_detections.append(detections)
                nonempty_dets = [
                    dets for dets in all_detections if dets.shape[0] > 0
                ]
                all_dets_table = Table(np.array([]), dets_table.cols)
                if len(nonempty_dets) > 0:
                    all_dets_table = Table(np.concatenate(nonempty_dets, 0),
                                           dets_table.cols)

                # compute the actual AP increase: adding dets to dets so far
                #ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt)
                ap = self.ev.compute_det_map(all_dets_table, gt)
                ap_diff = ap - prev_ap
                sample.det_actual_ap = ap_diff

                # Compure detector AUC reward
                # If the action took longer than we have time, benefit is 0 (which is already set above)
                if dt <= time_to_deadline:
                    midway_point = b.t + dt / 2.
                    if midway_point > self.bounds[0]:
                        length = max(0, self.bounds[1] - midway_point)
                    else:
                        length = self.bounds[1] - self.bounds[0]
                    auc_ap = 1. * ap_diff * length
                    sample.auc_ap_raw = auc_ap

                    # Now divide by the potential gain to compute the "normalized" reward
                    # Note that there are two cases: the curve goes up, or it turns down.
                    # In the first case, the normalizing area should be the area to ap=1.
                    # In the second case, the normalizing area should be the area to ap=0.
                    if ap_diff < 0:
                        divisor = time_to_deadline * (prev_ap)
                    else:
                        divisor = time_to_deadline * (1. - prev_ap)
                    if divisor < 0:
                        divisor = 0
                    auc_ap = 1 if divisor == 0 else auc_ap / divisor
                    assert (auc_ap >= -1 and auc_ap <= 1)
                    sample.auc_ap = auc_ap
                prev_ap = ap

            # Update the belief state with the observations
            if action.name == 'gist':
                b.update_with_gist(action_ind, obs['scores'])
            else:
                b.update_with_score(action_ind, obs['score'])

            # mean entropy
            entropy = np.mean(b.get_entropies())
            dh = entropy_prev - entropy  # this is actually -dh :)
            sample.entropy = dh

            auc_entropy = time_to_deadline * dh - dh * dt / 2
            divisor = (time_to_deadline * entropy_prev)
            if divisor == 0:
                auc_entropy = 1
            else:
                auc_entropy /= divisor
            if dt > time_to_deadline:
                auc_entropy = 0
            if not (auc_entropy >= -1 and auc_entropy <= 1):
                auc_entropy = 0
            sample.auc_entropy = auc_entropy

            entropy_prev = entropy

            # TODO: the below line of code should occur before the state is stored in the sample
            b.t += dt
            sample.dt = dt
            samples.append(sample)
            step_ind += 1

            # The updated belief state posterior over C is our classification result
            clses = b.get_p_c().tolist() + [img_ind, b.t]
            all_clses.append(clses)
            # Update action values and pick the next action
            self.update_actions(b)
            action_ind = self.select_action(b, epsilon)

            # check for stopping conditions
            if action_ind < 0:
                break
            if self.bounds and not self.policy_mode == 'oracle':
                if b.t > self.bounds[1]:
                    break

        # in case of 'oracle' mode, re-sort the detections and times in order of AP
        # contributions, and actually re-gather p_c's for clses.
        action_inds = [s.action_ind for s in samples]
        if self.policy_mode == 'oracle':
            naive_aps = np.array([s.det_naive_ap for s in samples])
            sorted_inds = np.argsort(-naive_aps,
                                     kind='merge')  # order-preserving
            all_detections = np.take(all_detections, sorted_inds)
            sorted_action_inds = np.take(action_inds, sorted_inds)

            # actually go through the whole thing again, getting new p_c's
            b.reset()
            all_clses = []
            for action_ind in sorted_action_inds:
                action = self.actions[action_ind]
                obs = action.obj.get_observations(image)
                b.t += obs['dt']
                if action.name == 'gist':
                    b.update_with_gist(action_ind, obs['scores'])
                else:
                    b.update_with_score(action_ind, obs['score'])
                clses = b.get_p_c().tolist() + [img_ind, b.t]
                all_clses.append(clses)

        # now construct the final dets array, with correct times
        times = [s.dt for s in samples]

        #assert(len(all_detections)==len(all_clses)==len(times))
        cum_times = np.cumsum(times)
        all_times = []
        all_nonempty_detections = []
        for i, dets in enumerate(all_detections):
            num_dets = dets.shape[0]
            if num_dets > 0:
                all_nonempty_detections.append(dets)
                t_vector = np.tile(cum_times[i], (num_dets, 1))
                all_times.append(t_vector)
        if len(all_nonempty_detections) > 0:
            all_detections = np.concatenate(all_nonempty_detections, 0)
            all_times = np.concatenate(all_times, 0)
            # appending 'time' column at end, as promised
            all_detections = np.hstack((all_detections, all_times))
            # we probably went over deadline with the oracle mode, so trim it down
            if self.bounds:
                if np.max(all_times) > self.bounds[1]:
                    first_overdeadline_ind = np.flatnonzero(
                        all_times > self.bounds[1])[0]
                    all_detections = all_detections[:first_overdeadline_ind, :]
        else:
            all_detections = np.array([])
        all_clses = np.array(all_clses)

        if verbose:
            print("DatasetPolicy on image with ind %d took %.3f s" %
                  (img_ind, self.tt.qtoc('run_on_image')))

        # TODO: temp debug thing
        if False:
            print("Action sequence was: %s" % [s.action_ind for s in samples])
            print("here's an image:")
            X = np.vstack((all_clses[:, :-2], image.get_cls_ground_truth()))
            np.set_printoptions(precision=2, suppress=True)
            print X
            plt.pcolor(np.flipud(X))
            plt.show()

        return (all_detections, all_clses, samples)
Ejemplo n.º 4
0
  def run_on_image(self, image, dataset, verbose=False, epsilon=0.01):
    """
    Return
    - list of detections in the image, with each row as self.det_columns
    - list of multi-label classification outputs, with each row as self.get_cls_cols()
    - list of <s,a,r,s',dt> samples.
    """
    gt = image.get_det_gt(with_diff=True)
    self.tt.tic('run_on_image')

    all_detections = []
    all_clses = []
    samples = []
    prev_ap = 0
    img_ind = dataset.get_img_ind(image)

    # If we have previously run_on_image(), then we already have a reference to an inf_model
    # Otherwise, we make a new one and store a reference to it, to keep it alive
    if hasattr(self,'inf_model'):
      b = BeliefState(self.train_dataset, self.actions, self.inference_mode,
        self.bounds, self.inf_model, self.fastinf_model_name)
    else:
      b = BeliefState(self.train_dataset, self.actions, self.inference_mode,
        self.bounds, fastinf_model_name=self.fastinf_model_name)
      self.b = b
      self.inf_model = b.model

    self.update_actions(b)
    action_ind = self.select_action(b,epsilon)
    step_ind = 0
    initial_clses = np.array(b.get_p_c().tolist() + [img_ind,0])
    entropy_prev = np.mean(b.get_entropies())
    while True:
      # Populate the sample with stuff we know
      sample = Sample()
      sample.step_ind = step_ind
      sample.img_ind = img_ind
      sample.state = b.compute_full_feature()
      sample.action_ind = action_ind

      # TODO: this is incorrect, and results in samples at t=0 to already have detections
      sample.t = b.t

      # prepare for AUC reward stuff
      # TODO: should set time_to_deadline to -Inf if no bounds
      time_to_deadline = 0
      if self.bounds:
        # this should never be less than zero, except for when running oracle
        time_to_deadline = max(0,self.bounds[1]-b.t)
      sample.auc_ap_raw = 0
      sample.auc_ap = 0

      # Take the action and get the observations as a dict
      action = self.actions[action_ind]
      obs = action.obj.get_observations(image)
      dt = obs['dt']

      # If observations include detections, compute the relevant
      # stuff for the sample collection
      sample.det_naive_ap = 0
      sample.det_actual_ap = 0
      if not 'dets' in obs:
        all_detections.append(np.array([]))
      else:
        det = action.obj
        dets = obs['dets']
        cls_ind = dataset.classes.index(det.cls)
        if dets.shape[0]>0:
          c_vector = np.tile(cls_ind,(np.shape(dets)[0],1))
          i_vector = np.tile(img_ind,(np.shape(dets)[0],1))
          detections = np.hstack((dets, c_vector, i_vector))
        else:
          detections = np.array([])
        dets_table = Table(detections,det.columns+['cls_ind','img_ind'])

        # compute the 'naive' det AP increase,
        # as if we were adding dets to an empty set
        #ap,rec,prec = self.ev.compute_det_pr(dets_table,gt)
        ap = self.ev.compute_det_map(dets_table,gt)
        sample.det_naive_ap = ap

        # TODO: am I needlessly recomputing this table?
        all_detections.append(detections)
        nonempty_dets = [dets for dets in all_detections if dets.shape[0]>0]
        all_dets_table = Table(np.array([]),dets_table.cols)
        if len(nonempty_dets)>0:
          all_dets_table = Table(np.concatenate(nonempty_dets,0),dets_table.cols)

        # compute the actual AP increase: adding dets to dets so far
        #ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt)
        ap = self.ev.compute_det_map(all_dets_table,gt)
        ap_diff = ap-prev_ap
        sample.det_actual_ap = ap_diff

        # Compure detector AUC reward
        # If the action took longer than we have time, benefit is 0 (which is already set above)
        if dt <= time_to_deadline:
          midway_point = b.t+dt/2.
          if midway_point > self.bounds[0]:
            length = max(0, self.bounds[1]-midway_point)
          else:
            length = self.bounds[1]-self.bounds[0]
          auc_ap = 1.*ap_diff * length
          sample.auc_ap_raw = auc_ap

          # Now divide by the potential gain to compute the "normalized" reward
          # Note that there are two cases: the curve goes up, or it turns down.
          # In the first case, the normalizing area should be the area to ap=1.
          # In the second case, the normalizing area should be the area to ap=0.
          if ap_diff<0:
            divisor = time_to_deadline*(prev_ap)
          else:
            divisor = time_to_deadline*(1.-prev_ap)
          if divisor < 0:
            divisor = 0
          auc_ap = 1 if divisor == 0 else auc_ap/divisor
          assert(auc_ap>=-1 and auc_ap<=1)
          sample.auc_ap = auc_ap  
        prev_ap = ap

      # Update the belief state with the observations
      if action.name=='gist':
        b.update_with_gist(action_ind, obs['scores'])
      else:
        b.update_with_score(action_ind, obs['score'])

      # mean entropy
      entropy = np.mean(b.get_entropies())
      dh = entropy_prev-entropy # this is actually -dh :)
      sample.entropy = dh

      auc_entropy = time_to_deadline * dh - dh * dt / 2
      divisor = (time_to_deadline * entropy_prev)
      if divisor == 0:
        auc_entropy = 1
      else:
        auc_entropy /= divisor
      if dt > time_to_deadline:
        auc_entropy = 0
      if not (auc_entropy>=-1 and auc_entropy<=1):
        auc_entropy = 0
      sample.auc_entropy = auc_entropy

      entropy_prev = entropy

      # TODO: the below line of code should occur before the state is stored in the sample
      b.t += dt
      sample.dt = dt
      samples.append(sample)
      step_ind += 1

      # The updated belief state posterior over C is our classification result
      clses = b.get_p_c().tolist() + [img_ind,b.t]
      all_clses.append(clses)
      # Update action values and pick the next action
      self.update_actions(b)
      action_ind = self.select_action(b,epsilon)

      # check for stopping conditions
      if action_ind < 0:
        break
      if self.bounds and not self.policy_mode=='oracle':
        if b.t > self.bounds[1]:
          break

    # in case of 'oracle' mode, re-sort the detections and times in order of AP
    # contributions, and actually re-gather p_c's for clses.
    action_inds = [s.action_ind for s in samples]
    if self.policy_mode=='oracle':
      naive_aps = np.array([s.det_naive_ap for s in samples])
      sorted_inds = np.argsort(-naive_aps,kind='merge') # order-preserving
      all_detections = np.take(all_detections, sorted_inds)
      sorted_action_inds = np.take(action_inds, sorted_inds)

      # actually go through the whole thing again, getting new p_c's
      b.reset()
      all_clses = []
      for action_ind in sorted_action_inds:
        action = self.actions[action_ind]
        obs = action.obj.get_observations(image)
        b.t += obs['dt']
        if action.name=='gist':
          b.update_with_gist(action_ind, obs['scores'])
        else:
          b.update_with_score(action_ind, obs['score'])
        clses = b.get_p_c().tolist() + [img_ind,b.t]
        all_clses.append(clses)

    # now construct the final dets array, with correct times
    times = [s.dt for s in samples]
    
    #assert(len(all_detections)==len(all_clses)==len(times))
    cum_times = np.cumsum(times)
    all_times = []
    all_nonempty_detections = []
    for i,dets in enumerate(all_detections):
      num_dets = dets.shape[0]
      if num_dets > 0:
        all_nonempty_detections.append(dets)
        t_vector = np.tile(cum_times[i],(num_dets,1)) 
        all_times.append(t_vector)
    if len(all_nonempty_detections)>0:
      all_detections = np.concatenate(all_nonempty_detections,0)
      all_times = np.concatenate(all_times,0)
      # appending 'time' column at end, as promised
      all_detections = np.hstack((all_detections,all_times))
      # we probably went over deadline with the oracle mode, so trim it down
      if self.bounds:
        if np.max(all_times)>self.bounds[1]:
          first_overdeadline_ind = np.flatnonzero(all_times>self.bounds[1])[0]
          all_detections = all_detections[:first_overdeadline_ind,:]
    else:
      all_detections = np.array([])
    all_clses = np.array(all_clses)

    if verbose:
      print("DatasetPolicy on image with ind %d took %.3f s"%(
        img_ind,self.tt.qtoc('run_on_image')))

    # TODO: temp debug thing
    if False:
      print("Action sequence was: %s"%[s.action_ind for s in samples])
      print("here's an image:")
      X = np.vstack((all_clses[:,:-2],image.get_cls_ground_truth()))
      np.set_printoptions(precision=2, suppress=True)
      print X
      plt.pcolor(np.flipud(X))
      plt.show()

    return (all_detections,all_clses,samples)