def test_feature_count(): env = feature_wrapper.make('FrozenLake-v0') # create dummy data: path = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4] features = [] for i in path: features.append(to_one_hot(i, 16)) trajs = [{'features': features}] result = feature_count(env, trajs, gamma=1.0) desired = np.array( [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # two times the same traj should get the same feature count: trajs = [{'features': features}, {'features': features}] result = feature_count(env, trajs, gamma=1.0) desired = np.array( [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # repeating a traj twice should double feature count (with gamma 1) trajs = [{'features': features + features}] result = feature_count(env, trajs, gamma=1.0) desired = np.array( [0., 2., 4., 6., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # test gamma 0.9: trajs = [{'features': features}] result = feature_count(env, trajs, gamma=.9) x = .9**6 + .9**7 + .9**8 + .9**9 desired = np.array([ 0., 1., .9 + .81, .729 + .6561 + .59049, x, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. ]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # test gamma 0: result = feature_count(env, trajs, gamma=0) desired = np.array( [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired)
def __init__(self, metric_input: dict): assert 'env' in metric_input.keys() assert 'expert_trajs' in metric_input.keys() super(FeatureCount2Loss, self).__init__(metric_input) expert_trajs = metric_input['expert_trajs'] self.env = metric_input['env'] self.expert_feature_count = feature_count( self.env, expert_trajs, gamma=1.0)
def evaluate(self, evaluation_input: dict = None) -> float: """Evaluate the metric given some input and return result. Parameters ---------- evaluation_input: dict Returns ------- float The metric's output """ assert 'irl_agent' in evaluation_input.keys() irl_trajs = self.generate_traj_if_not_exists(evaluation_input) irl_feature_count = feature_count(self.env, irl_trajs, gamma=1.0) diff = self.expert_feature_count - irl_feature_count return np.linalg.norm(diff, ord=2)
def feature_count(self, trajs: List[Dict[str, list]], gamma: float) -> np.ndarray: """Return empirical discounted feature counts of input trajectories. Parameters ---------- trajs: List[Dict[str, list]] A list of trajectories. Each trajectory is a dictionary with keys ['states', 'actions', 'rewards', 'true_rewards', 'features']. The values of each dictionary are lists. See :func:`irl_benchmark.irl.collect.collect_trajs`. gamma: float The discount factor. Must be in range [0., 1.]. Returns ------- np.ndarray A numpy array containing discounted feature counts. The shape is the same as the trajectories' feature shapes. One scalar feature count per feature. """ # This was moved to utils: return irl_utils.feature_count(self.env, trajs, gamma)
def evaluate(self, evaluation_input: dict): assert 'irl_agent' in evaluation_input.keys() irl_trajs = self.generate_traj_if_not_exists(evaluation_input) irl_feature_count = feature_count(self.env, irl_trajs, gamma=1.0) diff = self.expert_feature_count - irl_feature_count return np.linalg.norm(diff, ord=np.inf)