class TestBeliefState(unittest.TestCase):
    def setUp(self):
        d = Dataset(test_config, 'test_pascal_trainval').load_from_pascal('trainval', force=True)
        d2 = Dataset(test_config, 'test_pascal_test').load_from_pascal('test', force=True)
        config = {'detectors': ['csc_default']}
        self.dp = DatasetPolicy(test_config, d, d2, **config)
        self.bs = BeliefState(d, self.dp.actions)

    def test_featurization(self):
        ff = self.bs.compute_full_feature()
        np.set_printoptions(precision=2)
        print self.bs.block_out_action(ff, -1)
        print self.bs.block_out_action(ff, 0)
        print self.bs.block_out_action(ff, 3)
Ejemplo n.º 2
0
    def run_on_image(self, image, dataset, verbose=False, epsilon=0.01):
        """
        Return
        - list of detections in the image, with each row as self.det_columns
        - list of multi-label classification outputs, with each row as self.get_cls_cols()
        - list of <s,a,r,s',dt> samples.
        """
        gt = image.get_det_gt(with_diff=True)
        self.tt.tic('run_on_image')

        all_detections = []
        all_clses = []
        samples = []
        prev_ap = 0
        img_ind = dataset.get_img_ind(image)

        # If we have previously run_on_image(), then we already have a reference to an inf_model
        # Otherwise, we make a new one and store a reference to it, to keep it
        # alive
        if hasattr(self, 'inf_model'):
            b = BeliefState(self.train_dataset, self.actions,
                            self.inference_mode, self.bounds, self.inf_model,
                            self.fastinf_model_name)
        else:
            b = BeliefState(self.train_dataset,
                            self.actions,
                            self.inference_mode,
                            self.bounds,
                            fastinf_model_name=self.fastinf_model_name)
            self.b = b
            self.inf_model = b.model

        self.update_actions(b)
        action_ind = self.select_action(b, epsilon)
        step_ind = 0
        # TODO: why is the below var never used?
        # initial_clses = np.array(b.get_p_c().tolist() + [img_ind,0])
        entropy_prev = np.mean(b.get_entropies())
        while True:
            # Populate the sample with stuff we know
            sample = Sample()
            sample.step_ind = step_ind
            sample.img_ind = img_ind
            sample.state = b.compute_full_feature()
            sample.action_ind = action_ind

            # TODO: this is incorrect, and results in samples at t=0 to already
            # have detections
            sample.t = b.t

            # prepare for AUC reward stuff
            # TODO: should set time_to_deadline to -Inf if no bounds
            time_to_deadline = 0
            if self.bounds:
                # this should never be less than zero, except for when running
                # oracle
                time_to_deadline = max(0, self.bounds[1] - b.t)
            sample.auc_ap_raw = 0
            sample.auc_ap = 0

            # Take the action and get the observations as a dict
            action = self.actions[action_ind]
            obs = action.obj.get_observations(image)
            dt = obs['dt']

            # If observations include detections, compute the relevant
            # stuff for the sample collection
            sample.det_naive_ap = 0
            sample.det_actual_ap = 0
            if not 'dets' in obs:
                all_detections.append(np.array([]))
            else:
                det = action.obj
                dets = obs['dets']
                cls_ind = dataset.classes.index(det.cls)
                if dets.shape[0] > 0:
                    c_vector = np.tile(cls_ind, (np.shape(dets)[0], 1))
                    i_vector = np.tile(img_ind, (np.shape(dets)[0], 1))
                    detections = np.hstack((dets, c_vector, i_vector))
                else:
                    detections = np.array([])
                dets_table = Table(detections,
                                   det.columns + ['cls_ind', 'img_ind'])

                # compute the 'naive' det AP increase,
                # as if we were adding dets to an empty set
                # ap,rec,prec = self.ev.compute_det_pr(dets_table,gt)
                ap = self.ev.compute_det_map(dets_table, gt)
                sample.det_naive_ap = ap

                # TODO: am I needlessly recomputing this table?
                all_detections.append(detections)
                nonempty_dets = [
                    dets for dets in all_detections if dets.shape[0] > 0
                ]
                all_dets_table = Table(np.array([]), dets_table.cols)
                if len(nonempty_dets) > 0:
                    all_dets_table = Table(np.concatenate(nonempty_dets, 0),
                                           dets_table.cols)

                # compute the actual AP increase: adding dets to dets so far
                # ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt)
                ap = self.ev.compute_det_map(all_dets_table, gt)
                ap_diff = ap - prev_ap
                sample.det_actual_ap = ap_diff

                # Compure detector AUC reward
                # If the action took longer than we have time, benefit is 0
                # (which is already set above)
                if dt <= time_to_deadline:
                    midway_point = b.t + dt / 2.
                    if midway_point > self.bounds[0]:
                        length = max(0, self.bounds[1] - midway_point)
                    else:
                        length = self.bounds[1] - self.bounds[0]
                    auc_ap = 1. * ap_diff * length
                    sample.auc_ap_raw = auc_ap

                    # Now divide by the potential gain to compute the "normalized" reward
                    # Note that there are two cases: the curve goes up, or it turns down.
                    # In the first case, the normalizing area should be the area to ap=1.
                    # In the second case, the normalizing area should be the
                    # area to ap=0.
                    if ap_diff < 0:
                        divisor = time_to_deadline * (prev_ap)
                    else:
                        divisor = time_to_deadline * (1. - prev_ap)
                    if divisor < 0:
                        divisor = 0
                    auc_ap = 1 if divisor == 0 else auc_ap / divisor
                    assert (auc_ap >= -1 and auc_ap <= 1)
                    sample.auc_ap = auc_ap
                prev_ap = ap

            # Update the belief state with the observations
            if action.name == 'gist':
                b.update_with_gist(action_ind, obs['scores'])
            else:
                b.update_with_score(action_ind, obs['score'])

            # mean entropy
            entropy = np.mean(b.get_entropies())
            dh = entropy_prev - entropy  # this is actually -dh :)
            sample.entropy = dh

            auc_entropy = time_to_deadline * dh - dh * dt / 2
            divisor = (time_to_deadline * entropy_prev)
            if divisor == 0:
                auc_entropy = 1
            else:
                auc_entropy /= divisor
            if dt > time_to_deadline:
                auc_entropy = 0
            if not (auc_entropy >= -1 and auc_entropy <= 1):
                auc_entropy = 0
            sample.auc_entropy = auc_entropy

            entropy_prev = entropy

            # TODO: the below line of code should occur before the state is
            # stored in the sample
            b.t += dt
            sample.dt = dt
            samples.append(sample)
            step_ind += 1

            # The updated belief state posterior over C is our classification
            # result
            clses = b.get_p_c().tolist() + [img_ind, b.t]
            all_clses.append(clses)
            # Update action values and pick the next action
            self.update_actions(b)
            action_ind = self.select_action(b, epsilon)

            # check for stopping conditions
            if action_ind < 0:
                break
            if self.bounds and not self.policy_mode == 'oracle':
                if b.t > self.bounds[1]:
                    break

        # in case of 'oracle' mode, re-sort the detections and times in order of AP
        # contributions, and actually re-gather p_c's for clses.
        action_inds = [s.action_ind for s in samples]
        if self.policy_mode == 'oracle':
            naive_aps = np.array([s.det_naive_ap for s in samples])
            sorted_inds = np.argsort(-naive_aps,
                                     kind='merge')  # order-preserving
            all_detections = np.take(all_detections, sorted_inds)
            sorted_action_inds = np.take(action_inds, sorted_inds)

            # actually go through the whole thing again, getting new p_c's
            b.reset()
            all_clses = []
            for action_ind in sorted_action_inds:
                action = self.actions[action_ind]
                obs = action.obj.get_observations(image)
                b.t += obs['dt']
                if action.name == 'gist':
                    b.update_with_gist(action_ind, obs['scores'])
                else:
                    b.update_with_score(action_ind, obs['score'])
                clses = b.get_p_c().tolist() + [img_ind, b.t]
                all_clses.append(clses)

        # now construct the final dets array, with correct times
        times = [s.dt for s in samples]

        # assert(len(all_detections)==len(all_clses)==len(times))
        cum_times = np.cumsum(times)
        all_times = []
        all_nonempty_detections = []
        for i, dets in enumerate(all_detections):
            num_dets = dets.shape[0]
            if num_dets > 0:
                all_nonempty_detections.append(dets)
                t_vector = np.tile(cum_times[i], (num_dets, 1))
                all_times.append(t_vector)
        if len(all_nonempty_detections) > 0:
            all_detections = np.concatenate(all_nonempty_detections, 0)
            all_times = np.concatenate(all_times, 0)
            # appending 'time' column at end, as promised
            all_detections = np.hstack((all_detections, all_times))
            # we probably went over deadline with the oracle mode, so trim it
            # down
            if self.bounds:
                if np.max(all_times) > self.bounds[1]:
                    first_overdeadline_ind = np.flatnonzero(
                        all_times > self.bounds[1])[0]
                    all_detections = all_detections[:first_overdeadline_ind, :]
        else:
            all_detections = np.array([])
        all_clses = np.array(all_clses)

        if verbose:
            print("DatasetPolicy on image with ind %d took %.3f s" %
                  (img_ind, self.tt.qtoc('run_on_image')))

        # TODO: temp debug thing
        if False:
            print("Action sequence was: %s" % [s.action_ind for s in samples])
            print("here's an image:")
            X = np.vstack((all_clses[:, :-2], image.get_cls_ground_truth()))
            np.set_printoptions(precision=2, suppress=True)
            print X
            plt.pcolor(np.flipud(X))
            plt.show()

        return (all_detections, all_clses, samples)
    def run_on_image(self, image, dataset, verbose=False, epsilon=0.01):
        """
        Return
        - list of detections in the image, with each row as self.det_columns
        - list of multi-label classification outputs, with each row as self.get_cls_cols()
        - list of <s,a,r,s',dt> samples.
        """
        gt = image.get_det_gt(with_diff=True)
        self.tt.tic('run_on_image')

        all_detections = []
        all_clses = []
        samples = []
        prev_ap = 0
        img_ind = dataset.get_img_ind(image)

        # If we have previously run_on_image(), then we already have a reference to an inf_model
        # Otherwise, we make a new one and store a reference to it, to keep it
        # alive
        if hasattr(self, 'inf_model'):
            b = BeliefState(
                self.train_dataset, self.actions, self.inference_mode,
                self.bounds, self.inf_model, self.fastinf_model_name)
        else:
            b = BeliefState(
                self.train_dataset, self.actions, self.inference_mode,
                self.bounds, fastinf_model_name=self.fastinf_model_name)
            self.b = b
            self.inf_model = b.model

        self.update_actions(b)
        action_ind = self.select_action(b, epsilon)
        step_ind = 0
        # TODO: why is the below var never used?
        # initial_clses = np.array(b.get_p_c().tolist() + [img_ind,0])
        entropy_prev = np.mean(b.get_entropies())
        while True:
            # Populate the sample with stuff we know
            sample = Sample()
            sample.step_ind = step_ind
            sample.img_ind = img_ind
            sample.state = b.compute_full_feature()
            sample.action_ind = action_ind

            # TODO: this is incorrect, and results in samples at t=0 to already
            # have detections
            sample.t = b.t

            # prepare for AUC reward stuff
            # TODO: should set time_to_deadline to -Inf if no bounds
            time_to_deadline = 0
            if self.bounds:
                # this should never be less than zero, except for when running
                # oracle
                time_to_deadline = max(0, self.bounds[1] - b.t)
            sample.auc_ap_raw = 0
            sample.auc_ap = 0

            # Take the action and get the observations as a dict
            action = self.actions[action_ind]
            obs = action.obj.get_observations(image)
            dt = obs['dt']

            # If observations include detections, compute the relevant
            # stuff for the sample collection
            sample.det_naive_ap = 0
            sample.det_actual_ap = 0
            if not 'dets' in obs:
                all_detections.append(np.array([]))
            else:
                det = action.obj
                dets = obs['dets']
                cls_ind = dataset.classes.index(det.cls)
                if dets.shape[0] > 0:
                    c_vector = np.tile(cls_ind, (np.shape(dets)[0], 1))
                    i_vector = np.tile(img_ind, (np.shape(dets)[0], 1))
                    detections = np.hstack((dets, c_vector, i_vector))
                else:
                    detections = np.array([])
                dets_table = Table(
                    detections, det.columns + ['cls_ind', 'img_ind'])

                # compute the 'naive' det AP increase,
                # as if we were adding dets to an empty set
                # ap,rec,prec = self.ev.compute_det_pr(dets_table,gt)
                ap = self.ev.compute_det_map(dets_table, gt)
                sample.det_naive_ap = ap

                # TODO: am I needlessly recomputing this table?
                all_detections.append(detections)
                nonempty_dets = [
                    dets for dets in all_detections if dets.shape[0] > 0]
                all_dets_table = Table(np.array([]), dets_table.cols)
                if len(nonempty_dets) > 0:
                    all_dets_table = Table(
                        np.concatenate(nonempty_dets, 0), dets_table.cols)

                # compute the actual AP increase: adding dets to dets so far
                # ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt)
                ap = self.ev.compute_det_map(all_dets_table, gt)
                ap_diff = ap - prev_ap
                sample.det_actual_ap = ap_diff

                # Compure detector AUC reward
                # If the action took longer than we have time, benefit is 0
                # (which is already set above)
                if dt <= time_to_deadline:
                    midway_point = b.t + dt / 2.
                    if midway_point > self.bounds[0]:
                        length = max(0, self.bounds[1] - midway_point)
                    else:
                        length = self.bounds[1] - self.bounds[0]
                    auc_ap = 1. * ap_diff * length
                    sample.auc_ap_raw = auc_ap

                    # Now divide by the potential gain to compute the "normalized" reward
                    # Note that there are two cases: the curve goes up, or it turns down.
                    # In the first case, the normalizing area should be the area to ap=1.
                    # In the second case, the normalizing area should be the
                    # area to ap=0.
                    if ap_diff < 0:
                        divisor = time_to_deadline * (prev_ap)
                    else:
                        divisor = time_to_deadline * (1. - prev_ap)
                    if divisor < 0:
                        divisor = 0
                    auc_ap = 1 if divisor == 0 else auc_ap / divisor
                    assert(auc_ap >= -1 and auc_ap <= 1)
                    sample.auc_ap = auc_ap
                prev_ap = ap

            # Update the belief state with the observations
            if action.name == 'gist':
                b.update_with_gist(action_ind, obs['scores'])
            else:
                b.update_with_score(action_ind, obs['score'])

            # mean entropy
            entropy = np.mean(b.get_entropies())
            dh = entropy_prev - entropy  # this is actually -dh :)
            sample.entropy = dh

            auc_entropy = time_to_deadline * dh - dh * dt / 2
            divisor = (time_to_deadline * entropy_prev)
            if divisor == 0:
                auc_entropy = 1
            else:
                auc_entropy /= divisor
            if dt > time_to_deadline:
                auc_entropy = 0
            if not (auc_entropy >= -1 and auc_entropy <= 1):
                auc_entropy = 0
            sample.auc_entropy = auc_entropy

            entropy_prev = entropy

            # TODO: the below line of code should occur before the state is
            # stored in the sample
            b.t += dt
            sample.dt = dt
            samples.append(sample)
            step_ind += 1

            # The updated belief state posterior over C is our classification
            # result
            clses = b.get_p_c().tolist() + [img_ind, b.t]
            all_clses.append(clses)
            # Update action values and pick the next action
            self.update_actions(b)
            action_ind = self.select_action(b, epsilon)

            # check for stopping conditions
            if action_ind < 0:
                break
            if self.bounds and not self.policy_mode == 'oracle':
                if b.t > self.bounds[1]:
                    break

        # in case of 'oracle' mode, re-sort the detections and times in order of AP
        # contributions, and actually re-gather p_c's for clses.
        action_inds = [s.action_ind for s in samples]
        if self.policy_mode == 'oracle':
            naive_aps = np.array([s.det_naive_ap for s in samples])
            sorted_inds = np.argsort(
                -naive_aps, kind='merge')  # order-preserving
            all_detections = np.take(all_detections, sorted_inds)
            sorted_action_inds = np.take(action_inds, sorted_inds)

            # actually go through the whole thing again, getting new p_c's
            b.reset()
            all_clses = []
            for action_ind in sorted_action_inds:
                action = self.actions[action_ind]
                obs = action.obj.get_observations(image)
                b.t += obs['dt']
                if action.name == 'gist':
                    b.update_with_gist(action_ind, obs['scores'])
                else:
                    b.update_with_score(action_ind, obs['score'])
                clses = b.get_p_c().tolist() + [img_ind, b.t]
                all_clses.append(clses)

        # now construct the final dets array, with correct times
        times = [s.dt for s in samples]

        # assert(len(all_detections)==len(all_clses)==len(times))
        cum_times = np.cumsum(times)
        all_times = []
        all_nonempty_detections = []
        for i, dets in enumerate(all_detections):
            num_dets = dets.shape[0]
            if num_dets > 0:
                all_nonempty_detections.append(dets)
                t_vector = np.tile(cum_times[i], (num_dets, 1))
                all_times.append(t_vector)
        if len(all_nonempty_detections) > 0:
            all_detections = np.concatenate(all_nonempty_detections, 0)
            all_times = np.concatenate(all_times, 0)
            # appending 'time' column at end, as promised
            all_detections = np.hstack((all_detections, all_times))
            # we probably went over deadline with the oracle mode, so trim it
            # down
            if self.bounds:
                if np.max(all_times) > self.bounds[1]:
                    first_overdeadline_ind = np.flatnonzero(
                        all_times > self.bounds[1])[0]
                    all_detections = all_detections[:first_overdeadline_ind, :]
        else:
            all_detections = np.array([])
        all_clses = np.array(all_clses)

        if verbose:
            print("DatasetPolicy on image with ind %d took %.3f s" % (
                img_ind, self.tt.qtoc('run_on_image')))

        # TODO: temp debug thing
        if False:
            print("Action sequence was: %s" % [s.action_ind for s in samples])
            print("here's an image:")
            X = np.vstack((all_clses[:, :-2], image.get_cls_ground_truth()))
            np.set_printoptions(precision=2, suppress=True)
            print X
            plt.pcolor(np.flipud(X))
            plt.show()

        return (all_detections, all_clses, samples)