def __init__(self, env_name, config, upload_dir=None): config.update({"alg": "EvolutionStrategies"}) Algorithm.__init__(self, env_name, config, upload_dir=upload_dir) policy_params = { "ac_noise_std": 0.01 } env = gym.make(env_name) utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy( env.observation_space, env.action_space, **policy_params) tf_util.initialize() self.optimizer = optimizers.Adam(self.policy, config["stepsize"]) self.ob_stat = utils.RunningStat(env.observation_space.shape, eps=1e-2) # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote() self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(config, policy_params, env_name, noise_id) for _ in range(config["num_workers"])] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.tstart = time.time() self.iteration = 0
def __init__(self, env_name, config): Algorithm.__init__(self, env_name, config) policy_params = { "ac_bins": "continuous:", "ac_noise_std": 0.01, "nonlin_type": "tanh", "hidden_dims": [256, 256], "connection_type": "ff" } # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote() self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(config, policy_params, env_name, noise_id) for _ in range(config.num_workers) ] env = gym.make(env_name) utils.make_session(single_threaded=False) self.policy = policies.MujocoPolicy(env.observation_space, env.action_space, **policy_params) tf_util.initialize() self.optimizer = optimizers.Adam(self.policy, config.stepsize) self.ob_stat = utils.RunningStat(env.observation_space.shape, eps=1e-2) self.episodes_so_far = 0 self.timesteps_so_far = 0 self.tstart = time.time() self.iteration = 0
def __init__(self, config, policy_params, env_name, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = gym.make(env_name) self.preprocessor = ModelCatalog.get_preprocessor( env_name, self.env.observation_space.shape) self.preprocessor_shape = self.preprocessor.transform_shape( self.env.observation_space.shape) self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy(self.env.observation_space, self.env.action_space, self.preprocessor, **policy_params) tf_util.initialize() self.rs = np.random.RandomState() assert (self.policy.needs_ob_stat == (self.config["calc_obstat_prob"] != 0))
def __init__(self, config, policy_params, env_name, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = gym.make(env_name) self.sess = utils.make_session(single_threaded=True) self.policy = policies.MujocoPolicy(self.env.observation_space, self.env.action_space, **policy_params) tf_util.initialize() self.rs = np.random.RandomState() assert self.policy.needs_ob_stat == (self.config.calc_obstat_prob != 0)