class TestBeliefState(unittest.TestCase): def setUp(self): d = Dataset(test_config, 'test_pascal_trainval').load_from_pascal('trainval', force=True) d2 = Dataset(test_config, 'test_pascal_test').load_from_pascal('test', force=True) config = {'detectors': ['csc_default']} self.dp = DatasetPolicy(test_config, d, d2, **config) self.bs = BeliefState(d, self.dp.actions) def test_featurization(self): ff = self.bs.compute_full_feature() np.set_printoptions(precision=2) print self.bs.block_out_action(ff, -1) print self.bs.block_out_action(ff, 0) print self.bs.block_out_action(ff, 3)
def run_on_image(self, image, dataset, verbose=False, epsilon=0.01): """ Return - list of detections in the image, with each row as self.det_columns - list of multi-label classification outputs, with each row as self.get_cls_cols() - list of <s,a,r,s',dt> samples. """ gt = image.get_det_gt(with_diff=True) self.tt.tic('run_on_image') all_detections = [] all_clses = [] samples = [] prev_ap = 0 img_ind = dataset.get_img_ind(image) # If we have previously run_on_image(), then we already have a reference to an inf_model # Otherwise, we make a new one and store a reference to it, to keep it # alive if hasattr(self, 'inf_model'): b = BeliefState(self.train_dataset, self.actions, self.inference_mode, self.bounds, self.inf_model, self.fastinf_model_name) else: b = BeliefState(self.train_dataset, self.actions, self.inference_mode, self.bounds, fastinf_model_name=self.fastinf_model_name) self.b = b self.inf_model = b.model self.update_actions(b) action_ind = self.select_action(b, epsilon) step_ind = 0 # TODO: why is the below var never used? # initial_clses = np.array(b.get_p_c().tolist() + [img_ind,0]) entropy_prev = np.mean(b.get_entropies()) while True: # Populate the sample with stuff we know sample = Sample() sample.step_ind = step_ind sample.img_ind = img_ind sample.state = b.compute_full_feature() sample.action_ind = action_ind # TODO: this is incorrect, and results in samples at t=0 to already # have detections sample.t = b.t # prepare for AUC reward stuff # TODO: should set time_to_deadline to -Inf if no bounds time_to_deadline = 0 if self.bounds: # this should never be less than zero, except for when running # oracle time_to_deadline = max(0, self.bounds[1] - b.t) sample.auc_ap_raw = 0 sample.auc_ap = 0 # Take the action and get the observations as a dict action = self.actions[action_ind] obs = action.obj.get_observations(image) dt = obs['dt'] # If observations include detections, compute the relevant # stuff for the sample collection sample.det_naive_ap = 0 sample.det_actual_ap = 0 if not 'dets' in obs: all_detections.append(np.array([])) else: det = action.obj dets = obs['dets'] cls_ind = dataset.classes.index(det.cls) if dets.shape[0] > 0: c_vector = np.tile(cls_ind, (np.shape(dets)[0], 1)) i_vector = np.tile(img_ind, (np.shape(dets)[0], 1)) detections = np.hstack((dets, c_vector, i_vector)) else: detections = np.array([]) dets_table = Table(detections, det.columns + ['cls_ind', 'img_ind']) # compute the 'naive' det AP increase, # as if we were adding dets to an empty set # ap,rec,prec = self.ev.compute_det_pr(dets_table,gt) ap = self.ev.compute_det_map(dets_table, gt) sample.det_naive_ap = ap # TODO: am I needlessly recomputing this table? all_detections.append(detections) nonempty_dets = [ dets for dets in all_detections if dets.shape[0] > 0 ] all_dets_table = Table(np.array([]), dets_table.cols) if len(nonempty_dets) > 0: all_dets_table = Table(np.concatenate(nonempty_dets, 0), dets_table.cols) # compute the actual AP increase: adding dets to dets so far # ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt) ap = self.ev.compute_det_map(all_dets_table, gt) ap_diff = ap - prev_ap sample.det_actual_ap = ap_diff # Compure detector AUC reward # If the action took longer than we have time, benefit is 0 # (which is already set above) if dt <= time_to_deadline: midway_point = b.t + dt / 2. if midway_point > self.bounds[0]: length = max(0, self.bounds[1] - midway_point) else: length = self.bounds[1] - self.bounds[0] auc_ap = 1. * ap_diff * length sample.auc_ap_raw = auc_ap # Now divide by the potential gain to compute the "normalized" reward # Note that there are two cases: the curve goes up, or it turns down. # In the first case, the normalizing area should be the area to ap=1. # In the second case, the normalizing area should be the # area to ap=0. if ap_diff < 0: divisor = time_to_deadline * (prev_ap) else: divisor = time_to_deadline * (1. - prev_ap) if divisor < 0: divisor = 0 auc_ap = 1 if divisor == 0 else auc_ap / divisor assert (auc_ap >= -1 and auc_ap <= 1) sample.auc_ap = auc_ap prev_ap = ap # Update the belief state with the observations if action.name == 'gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) # mean entropy entropy = np.mean(b.get_entropies()) dh = entropy_prev - entropy # this is actually -dh :) sample.entropy = dh auc_entropy = time_to_deadline * dh - dh * dt / 2 divisor = (time_to_deadline * entropy_prev) if divisor == 0: auc_entropy = 1 else: auc_entropy /= divisor if dt > time_to_deadline: auc_entropy = 0 if not (auc_entropy >= -1 and auc_entropy <= 1): auc_entropy = 0 sample.auc_entropy = auc_entropy entropy_prev = entropy # TODO: the below line of code should occur before the state is # stored in the sample b.t += dt sample.dt = dt samples.append(sample) step_ind += 1 # The updated belief state posterior over C is our classification # result clses = b.get_p_c().tolist() + [img_ind, b.t] all_clses.append(clses) # Update action values and pick the next action self.update_actions(b) action_ind = self.select_action(b, epsilon) # check for stopping conditions if action_ind < 0: break if self.bounds and not self.policy_mode == 'oracle': if b.t > self.bounds[1]: break # in case of 'oracle' mode, re-sort the detections and times in order of AP # contributions, and actually re-gather p_c's for clses. action_inds = [s.action_ind for s in samples] if self.policy_mode == 'oracle': naive_aps = np.array([s.det_naive_ap for s in samples]) sorted_inds = np.argsort(-naive_aps, kind='merge') # order-preserving all_detections = np.take(all_detections, sorted_inds) sorted_action_inds = np.take(action_inds, sorted_inds) # actually go through the whole thing again, getting new p_c's b.reset() all_clses = [] for action_ind in sorted_action_inds: action = self.actions[action_ind] obs = action.obj.get_observations(image) b.t += obs['dt'] if action.name == 'gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) clses = b.get_p_c().tolist() + [img_ind, b.t] all_clses.append(clses) # now construct the final dets array, with correct times times = [s.dt for s in samples] # assert(len(all_detections)==len(all_clses)==len(times)) cum_times = np.cumsum(times) all_times = [] all_nonempty_detections = [] for i, dets in enumerate(all_detections): num_dets = dets.shape[0] if num_dets > 0: all_nonempty_detections.append(dets) t_vector = np.tile(cum_times[i], (num_dets, 1)) all_times.append(t_vector) if len(all_nonempty_detections) > 0: all_detections = np.concatenate(all_nonempty_detections, 0) all_times = np.concatenate(all_times, 0) # appending 'time' column at end, as promised all_detections = np.hstack((all_detections, all_times)) # we probably went over deadline with the oracle mode, so trim it # down if self.bounds: if np.max(all_times) > self.bounds[1]: first_overdeadline_ind = np.flatnonzero( all_times > self.bounds[1])[0] all_detections = all_detections[:first_overdeadline_ind, :] else: all_detections = np.array([]) all_clses = np.array(all_clses) if verbose: print("DatasetPolicy on image with ind %d took %.3f s" % (img_ind, self.tt.qtoc('run_on_image'))) # TODO: temp debug thing if False: print("Action sequence was: %s" % [s.action_ind for s in samples]) print("here's an image:") X = np.vstack((all_clses[:, :-2], image.get_cls_ground_truth())) np.set_printoptions(precision=2, suppress=True) print X plt.pcolor(np.flipud(X)) plt.show() return (all_detections, all_clses, samples)
def run_on_image(self, image, dataset, verbose=False, epsilon=0.01): """ Return - list of detections in the image, with each row as self.det_columns - list of multi-label classification outputs, with each row as self.get_cls_cols() - list of <s,a,r,s',dt> samples. """ gt = image.get_det_gt(with_diff=True) self.tt.tic('run_on_image') all_detections = [] all_clses = [] samples = [] prev_ap = 0 img_ind = dataset.get_img_ind(image) # If we have previously run_on_image(), then we already have a reference to an inf_model # Otherwise, we make a new one and store a reference to it, to keep it # alive if hasattr(self, 'inf_model'): b = BeliefState( self.train_dataset, self.actions, self.inference_mode, self.bounds, self.inf_model, self.fastinf_model_name) else: b = BeliefState( self.train_dataset, self.actions, self.inference_mode, self.bounds, fastinf_model_name=self.fastinf_model_name) self.b = b self.inf_model = b.model self.update_actions(b) action_ind = self.select_action(b, epsilon) step_ind = 0 # TODO: why is the below var never used? # initial_clses = np.array(b.get_p_c().tolist() + [img_ind,0]) entropy_prev = np.mean(b.get_entropies()) while True: # Populate the sample with stuff we know sample = Sample() sample.step_ind = step_ind sample.img_ind = img_ind sample.state = b.compute_full_feature() sample.action_ind = action_ind # TODO: this is incorrect, and results in samples at t=0 to already # have detections sample.t = b.t # prepare for AUC reward stuff # TODO: should set time_to_deadline to -Inf if no bounds time_to_deadline = 0 if self.bounds: # this should never be less than zero, except for when running # oracle time_to_deadline = max(0, self.bounds[1] - b.t) sample.auc_ap_raw = 0 sample.auc_ap = 0 # Take the action and get the observations as a dict action = self.actions[action_ind] obs = action.obj.get_observations(image) dt = obs['dt'] # If observations include detections, compute the relevant # stuff for the sample collection sample.det_naive_ap = 0 sample.det_actual_ap = 0 if not 'dets' in obs: all_detections.append(np.array([])) else: det = action.obj dets = obs['dets'] cls_ind = dataset.classes.index(det.cls) if dets.shape[0] > 0: c_vector = np.tile(cls_ind, (np.shape(dets)[0], 1)) i_vector = np.tile(img_ind, (np.shape(dets)[0], 1)) detections = np.hstack((dets, c_vector, i_vector)) else: detections = np.array([]) dets_table = Table( detections, det.columns + ['cls_ind', 'img_ind']) # compute the 'naive' det AP increase, # as if we were adding dets to an empty set # ap,rec,prec = self.ev.compute_det_pr(dets_table,gt) ap = self.ev.compute_det_map(dets_table, gt) sample.det_naive_ap = ap # TODO: am I needlessly recomputing this table? all_detections.append(detections) nonempty_dets = [ dets for dets in all_detections if dets.shape[0] > 0] all_dets_table = Table(np.array([]), dets_table.cols) if len(nonempty_dets) > 0: all_dets_table = Table( np.concatenate(nonempty_dets, 0), dets_table.cols) # compute the actual AP increase: adding dets to dets so far # ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt) ap = self.ev.compute_det_map(all_dets_table, gt) ap_diff = ap - prev_ap sample.det_actual_ap = ap_diff # Compure detector AUC reward # If the action took longer than we have time, benefit is 0 # (which is already set above) if dt <= time_to_deadline: midway_point = b.t + dt / 2. if midway_point > self.bounds[0]: length = max(0, self.bounds[1] - midway_point) else: length = self.bounds[1] - self.bounds[0] auc_ap = 1. * ap_diff * length sample.auc_ap_raw = auc_ap # Now divide by the potential gain to compute the "normalized" reward # Note that there are two cases: the curve goes up, or it turns down. # In the first case, the normalizing area should be the area to ap=1. # In the second case, the normalizing area should be the # area to ap=0. if ap_diff < 0: divisor = time_to_deadline * (prev_ap) else: divisor = time_to_deadline * (1. - prev_ap) if divisor < 0: divisor = 0 auc_ap = 1 if divisor == 0 else auc_ap / divisor assert(auc_ap >= -1 and auc_ap <= 1) sample.auc_ap = auc_ap prev_ap = ap # Update the belief state with the observations if action.name == 'gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) # mean entropy entropy = np.mean(b.get_entropies()) dh = entropy_prev - entropy # this is actually -dh :) sample.entropy = dh auc_entropy = time_to_deadline * dh - dh * dt / 2 divisor = (time_to_deadline * entropy_prev) if divisor == 0: auc_entropy = 1 else: auc_entropy /= divisor if dt > time_to_deadline: auc_entropy = 0 if not (auc_entropy >= -1 and auc_entropy <= 1): auc_entropy = 0 sample.auc_entropy = auc_entropy entropy_prev = entropy # TODO: the below line of code should occur before the state is # stored in the sample b.t += dt sample.dt = dt samples.append(sample) step_ind += 1 # The updated belief state posterior over C is our classification # result clses = b.get_p_c().tolist() + [img_ind, b.t] all_clses.append(clses) # Update action values and pick the next action self.update_actions(b) action_ind = self.select_action(b, epsilon) # check for stopping conditions if action_ind < 0: break if self.bounds and not self.policy_mode == 'oracle': if b.t > self.bounds[1]: break # in case of 'oracle' mode, re-sort the detections and times in order of AP # contributions, and actually re-gather p_c's for clses. action_inds = [s.action_ind for s in samples] if self.policy_mode == 'oracle': naive_aps = np.array([s.det_naive_ap for s in samples]) sorted_inds = np.argsort( -naive_aps, kind='merge') # order-preserving all_detections = np.take(all_detections, sorted_inds) sorted_action_inds = np.take(action_inds, sorted_inds) # actually go through the whole thing again, getting new p_c's b.reset() all_clses = [] for action_ind in sorted_action_inds: action = self.actions[action_ind] obs = action.obj.get_observations(image) b.t += obs['dt'] if action.name == 'gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) clses = b.get_p_c().tolist() + [img_ind, b.t] all_clses.append(clses) # now construct the final dets array, with correct times times = [s.dt for s in samples] # assert(len(all_detections)==len(all_clses)==len(times)) cum_times = np.cumsum(times) all_times = [] all_nonempty_detections = [] for i, dets in enumerate(all_detections): num_dets = dets.shape[0] if num_dets > 0: all_nonempty_detections.append(dets) t_vector = np.tile(cum_times[i], (num_dets, 1)) all_times.append(t_vector) if len(all_nonempty_detections) > 0: all_detections = np.concatenate(all_nonempty_detections, 0) all_times = np.concatenate(all_times, 0) # appending 'time' column at end, as promised all_detections = np.hstack((all_detections, all_times)) # we probably went over deadline with the oracle mode, so trim it # down if self.bounds: if np.max(all_times) > self.bounds[1]: first_overdeadline_ind = np.flatnonzero( all_times > self.bounds[1])[0] all_detections = all_detections[:first_overdeadline_ind, :] else: all_detections = np.array([]) all_clses = np.array(all_clses) if verbose: print("DatasetPolicy on image with ind %d took %.3f s" % ( img_ind, self.tt.qtoc('run_on_image'))) # TODO: temp debug thing if False: print("Action sequence was: %s" % [s.action_ind for s in samples]) print("here's an image:") X = np.vstack((all_clses[:, :-2], image.get_cls_ground_truth())) np.set_printoptions(precision=2, suppress=True) print X plt.pcolor(np.flipud(X)) plt.show() return (all_detections, all_clses, samples)
def setUp(self): d = Dataset(test_config, 'test_pascal_trainval').load_from_pascal('trainval', force=True) d2 = Dataset(test_config, 'test_pascal_test').load_from_pascal('test', force=True) config = {'detectors': ['csc_default']} self.dp = DatasetPolicy(test_config, d, d2, **config) self.bs = BeliefState(d, self.dp.actions)