Ejemplo n.º 1
0
def test_feature_count():
    env = feature_wrapper.make('FrozenLake-v0')
    # create dummy data:
    path = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
    features = []
    for i in path:
        features.append(to_one_hot(i, 16))

    trajs = [{'features': features}]
    result = feature_count(env, trajs, gamma=1.0)
    desired = np.array(
        [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # two times the same traj should get the same feature count:
    trajs = [{'features': features}, {'features': features}]
    result = feature_count(env, trajs, gamma=1.0)
    desired = np.array(
        [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # repeating a traj twice should double feature count (with gamma 1)
    trajs = [{'features': features + features}]
    result = feature_count(env, trajs, gamma=1.0)
    desired = np.array(
        [0., 2., 4., 6., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # test gamma 0.9:
    trajs = [{'features': features}]
    result = feature_count(env, trajs, gamma=.9)
    x = .9**6 + .9**7 + .9**8 + .9**9
    desired = np.array([
        0., 1., .9 + .81, .729 + .6561 + .59049, x, 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.
    ])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # test gamma 0:
    result = feature_count(env, trajs, gamma=0)
    desired = np.array(
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)
Ejemplo n.º 2
0
 def __init__(self, metric_input: dict):
     assert 'env' in metric_input.keys()
     assert 'expert_trajs' in metric_input.keys()
     super(FeatureCount2Loss, self).__init__(metric_input)
     expert_trajs = metric_input['expert_trajs']
     self.env = metric_input['env']
     self.expert_feature_count = feature_count(
         self.env, expert_trajs, gamma=1.0)
Ejemplo n.º 3
0
    def evaluate(self, evaluation_input: dict = None) -> float:
        """Evaluate the metric given some input and return result.

        Parameters
        ----------
        evaluation_input: dict

        Returns
        -------
        float
            The metric's output
        """
        assert 'irl_agent' in evaluation_input.keys()
        irl_trajs = self.generate_traj_if_not_exists(evaluation_input)
        irl_feature_count = feature_count(self.env, irl_trajs, gamma=1.0)
        diff = self.expert_feature_count - irl_feature_count
        return np.linalg.norm(diff, ord=2)
Ejemplo n.º 4
0
    def feature_count(self, trajs: List[Dict[str, list]],
                      gamma: float) -> np.ndarray:
        """Return empirical discounted feature counts of input trajectories.

        Parameters
        ----------
        trajs: List[Dict[str, list]]
             A list of trajectories.
            Each trajectory is a dictionary with keys
            ['states', 'actions', 'rewards', 'true_rewards', 'features'].
            The values of each dictionary are lists.
            See :func:`irl_benchmark.irl.collect.collect_trajs`.
        gamma: float
            The discount factor. Must be in range [0., 1.].

        Returns
        -------
        np.ndarray
            A numpy array containing discounted feature counts. The shape
            is the same as the trajectories' feature shapes. One scalar
            feature count per feature.
        """
        # This was moved to utils:
        return irl_utils.feature_count(self.env, trajs, gamma)
Ejemplo n.º 5
0
 def evaluate(self, evaluation_input: dict):
     assert 'irl_agent' in evaluation_input.keys()
     irl_trajs = self.generate_traj_if_not_exists(evaluation_input)
     irl_feature_count = feature_count(self.env, irl_trajs, gamma=1.0)
     diff = self.expert_feature_count - irl_feature_count
     return np.linalg.norm(diff, ord=np.inf)