def evaluate(self, policy, n_episodes=1, initial_state=None, transform_to_internal_state=None, render=False, results_dir='/tmp/'): """ Evaluate a policy on the mdp. :param policy: a policy outputing an action :type policy: Policy :param n_episodes: number of episodes to evaluate :type n_episodes: int :param initial_state: the initial state to evaluate from :type initial_state: int :param transform_to_internal_state: transform the states or initial state to an internal state of the mdp :type transform_to_internal_state: function :param render: reder the environment :type render: bool :return: a Dataset with the trajectories """ dataset = Dataset(results_dir=results_dir) for episode in range(n_episodes): trajectory = [] state = self._env.reset() if initial_state is not None: state = initial_state if self._quanser_robots: self._env.env._sim_state = np.copy( transform_to_internal_state(state)) else: self._env.env.state = np.copy( transform_to_internal_state(state)) for j in range(self._env._max_episode_steps): with torch.no_grad(): state = torch.tensor(state, device=policy.device, dtype=TORCH_DTYPE) action = policy(state).to('cpu').numpy().reshape((-1, )) state_next, rew, done, _ = self._env.step(action) trajectory.append((state.to('cpu').numpy(), action, rew, state_next, done)) state = state_next if render: self._env.render() if done: break dataset.add_trajectory(trajectory) self._env.close() return dataset
kwargs={ 'fs': 200.0, 'fs_ctrl': 200.0 }) env = GentlyTerminating(gym.make('Qube-100-v1')) dataset = Dataset() n_trajectories = 1 for traj in range(n_trajectories): trajectory = [] ctrl = SwingUpCtrl() obs = env.reset() done = False while not done: env.render() act = ctrl(obs) obs_n, r, done, _ = env.step(act) trajectory.append((obs, act, r, obs_n, done)) if done: break obs = np.copy(obs_n) env.close() dataset.add_trajectory(trajectory) dataset.update_dataset_internal() print(np.sum(dataset._rewards)) filename = "../datasets/qube/{}_trajectories.npy".format(n_trajectories) dataset.save_trajectories_to_file(filename)