def _play(self, planning_data): if self.args.num_expert_episode_to_save > 0 and \ self._previous_reward > self._env_solved_reward and \ self._worker_id == 0: start_save_episode = True logger.info('Last episodic reward: %.4f' % self._previous_reward) logger.info('Minimum reward of %.4f is needed to start saving' % self._env_solved_reward) logger.info('[SAVING] Worker %d will record its episode data' % self._worker_id) else: start_save_episode = False if self.args.num_expert_episode_to_save > 0 \ and self._worker_id == 0: logger.info('Last episodic reward: %.4f' % self._previous_reward) logger.info( 'Minimum reward of %.4f is needed to start saving' % self._env_solved_reward) traj_episode = play_episode_with_env( self._env, self._act, { 'use_random_action': planning_data['use_random_action'], 'record_flag': start_save_episode, 'num_episode': self.args.num_expert_episode_to_save, 'data_name': self.args.task + '_' + self.args.exp_id }) self._previous_reward = np.sum(traj_episode['rewards']) return traj_episode
def _play(self, use_random_action=False): # provide the environment, the policy function @self._act traj_episode = play_episode_with_env( self._env, self._act, {'use_random_action': use_random_action} ) return traj_episode
def _play(self, planning_data): ''' # TODO NOTE: var_list = self._network['policy'][0]._trainable_var_list print('') for var in var_list: print(var.name) # print(var.name, self._session.run(var)[-1]) ''' if planning_data['use_true_env']: traj_episode = play_episode_with_env( self._env, self._act, {'use_random_action': planning_data['use_random_action']}) else: traj_episode = play_episode_with_env( self._fake_env, self._act, {'use_random_action': planning_data['use_random_action']}) return traj_episode