def __init__(self, env_spec, obs_history_len, N, size, labeller=None, save_rollouts=False, save_rollouts_observations=True, save_env_infos=False, alpha=None, beta_schedule=None): assert (alpha is not None) assert (beta_schedule is not None) self._alpha = alpha self._beta_schedule = schedules.PiecewiseSchedule(**beta_schedule) self._max_pri_set = set() super(PERPool, self).__init__( env_spec=env_spec, obs_history_len=obs_history_len, N=N, size=size, labeller=labeller, save_rollouts=save_rollouts, save_rollouts_observations=save_rollouts_observations, save_env_infos=save_env_infos)
def __init__(self, env_spec, endpoints, outside_value): super(EpsilonGreedyStrategy, self).__init__(env_spec) self.schedule = schedules.PiecewiseSchedule( endpoints=endpoints, outside_value=outside_value)
def __init__(self, **kwargs): self._outputs = kwargs['outputs'] self._rew_fn = kwargs['rew_fn'] ### environment self._env_spec = kwargs['env_spec'] self._obs_vec_keys = list(self._env_spec.observation_vec_spec.keys()) self._action_keys = list(self._env_spec.action_spec.keys()) self._goal_keys = list(self._env_spec.goal_spec.keys()) self._output_keys = sorted( [output['name'] for output in self._outputs]) self._obs_im_shape = self._env_spec.observation_im_space.shape self._obs_im_dim = np.prod(self._obs_im_shape) self._obs_vec_dim = len(self._obs_vec_keys) self._action_dim = len(self._action_keys) self._goal_dim = len(self._goal_keys) self._output_dim = len(self._output_keys) ### model horizons self._N = kwargs['N'] # number of returns to use (N-step) self._H = kwargs['H'] # action planning horizon for training self._gamma = kwargs['gamma'] # reward decay self._obs_history_len = kwargs[ 'obs_history_len'] # how many previous observations to use ### model architecture self._inference_only = kwargs.get('inference_only', False) self._image_graph = kwargs['image_graph'] self._observation_graph = kwargs['observation_graph'] self._action_graph = kwargs['action_graph'] self._rnn_graph = kwargs['rnn_graph'] self._output_graph = kwargs['output_graph'] ### scopes self._image_scope = 'image_scope' self._observation_scope = 'observation_scope' self._action_scope = 'action_scope' self._rnn_scope = 'rnn_scope' self._output_scope = 'output_scope' ### target network self._use_target = kwargs['use_target'] self._separate_target_params = kwargs['separate_target_params'] ### training self._only_completed_episodes = kwargs['only_completed_episodes'] self._weight_decay = kwargs['weight_decay'] self._lr_schedule = schedules.PiecewiseSchedule( **kwargs['lr_schedule']) self._grad_clip_norm = kwargs['grad_clip_norm'] self._gpu_device = kwargs['gpu_device'] self._gpu_frac = kwargs['gpu_frac'] ### action selection and exploration self._get_action_test = kwargs['get_action_test'] self._get_action_target = kwargs['get_action_target'] assert (self._get_action_target['type'] == 'random') gaussian_es_params = kwargs['exploration_strategies'].get( 'GaussianStrategy', None) if gaussian_es_params is not None: self._gaussian_es = GaussianStrategy( self._env_spec, ** gaussian_es_params) if gaussian_es_params else None else: self._gaussian_es = None epsilon_greedy_es_params = kwargs['exploration_strategies'].get( 'EpsilonGreedyStrategy', None) if epsilon_greedy_es_params is not None: self._epsilon_greedy_es = EpsilonGreedyStrategy( self._env_spec, **epsilon_greedy_es_params) else: self._epsilon_greedy_es = None ### setup the model self._tf_debug = dict() self._tf_dict = self._graph_setup() ### logging self._log_stats = defaultdict(list) assert (self._N >= self._H)
def __init__(self, **kwargs): ### environment self._env_spec = kwargs['env_spec'] ### model horizons self._N = kwargs['N'] # number of returns to use (N-step) self._H = kwargs['H'] # action planning horizon for training self._gamma = kwargs['gamma'] # reward decay self._obs_history_len = kwargs[ 'obs_history_len'] # how many previous observations to use ### model architecture self._inference_only = kwargs.get('inference_only', False) self._image_graph = kwargs['image_graph'] self._observation_graph = kwargs['observation_graph'] self._action_graph = kwargs['action_graph'] self._rnn_graph = kwargs['rnn_graph'] self._output_graph = kwargs['output_graph'] ### target network self._values_softmax = kwargs[ 'values_softmax'] # which value horizons to train over self._use_target = kwargs['use_target'] self._separate_target_params = kwargs['separate_target_params'] self._clip_cost_target_with_dones = kwargs[ 'clip_cost_target_with_dones'] ### training self._only_completed_episodes = kwargs['only_completed_episodes'] self._weight_decay = kwargs['weight_decay'] self._lr_schedule = schedules.PiecewiseSchedule( **kwargs['lr_schedule']) self._grad_clip_norm = kwargs['grad_clip_norm'] self._preprocess_params = kwargs['preprocess'] self._gpu_device = kwargs['gpu_device'] self._gpu_frac = kwargs['gpu_frac'] ### action selection and exploration self._get_action_test = kwargs['get_action_test'] self._get_action_target = kwargs['get_action_target'] assert (self._get_action_target['type'] == 'random') gaussian_es_params = kwargs['exploration_strategies'].get( 'GaussianStrategy', None) if gaussian_es_params is not None: self._gaussian_es = GaussianStrategy( self._env_spec, ** gaussian_es_params) if gaussian_es_params else None else: self._gaussian_es = None epsilon_greedy_es_params = kwargs['exploration_strategies'].get( 'EpsilonGreedyStrategy', None) if epsilon_greedy_es_params is not None: self._epsilon_greedy_es = EpsilonGreedyStrategy( self._env_spec, **epsilon_greedy_es_params) else: self._epsilon_greedy_es = None ### setup the model self._tf_debug = dict() self._tf_dict = self._graph_setup() ### logging self._log_stats = defaultdict(list) assert ((self._N == 1 and self._H == 1) or (self._N > 1 and self._H == 1) or (self._N > 1 and self._H > 1))
def __init__(self, env_spec, endpoints, outside_value): assert isinstance(env_spec.action_space, Box) self._env_spec = env_spec self.schedule = schedules.PiecewiseSchedule( endpoints=endpoints, outside_value=outside_value)
def __init__(self, env_spec, endpoints, outside_value): self._env_spec = env_spec self.schedule = schedules.PiecewiseSchedule( endpoints=endpoints, outside_value=outside_value)
def __init__(self, env_spec, endpoints, outside_value): super(GaussianStrategy, self).__init__(env_spec) assert isinstance(env_spec.action_space, Box) self.schedule = schedules.PiecewiseSchedule( endpoints=endpoints, outside_value=outside_value)