Example #1
0
    def __init__(self, env_name, config, upload_dir=None):
        config.update({"alg": "EvolutionStrategies"})

        Algorithm.__init__(self, env_name, config, upload_dir=upload_dir)

        policy_params = {
            "ac_noise_std": 0.01
        }

        env = gym.make(env_name)
        utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(
            env.observation_space, env.action_space, **policy_params)
        tf_util.initialize()
        self.optimizer = optimizers.Adam(self.policy, config["stepsize"])
        self.ob_stat = utils.RunningStat(env.observation_space.shape, eps=1e-2)

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(config, policy_params, env_name, noise_id)
            for _ in range(config["num_workers"])]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()
        self.iteration = 0
Example #2
0
    def __init__(self, env_name, config):
        Algorithm.__init__(self, env_name, config)

        policy_params = {
            "ac_bins": "continuous:",
            "ac_noise_std": 0.01,
            "nonlin_type": "tanh",
            "hidden_dims": [256, 256],
            "connection_type": "ff"
        }

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(config, policy_params, env_name, noise_id)
            for _ in range(config.num_workers)
        ]

        env = gym.make(env_name)
        utils.make_session(single_threaded=False)
        self.policy = policies.MujocoPolicy(env.observation_space,
                                            env.action_space, **policy_params)
        tf_util.initialize()
        self.optimizer = optimizers.Adam(self.policy, config.stepsize)
        self.ob_stat = utils.RunningStat(env.observation_space.shape, eps=1e-2)

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()
        self.iteration = 0
Example #3
0
    def __init__(self,
                 config,
                 policy_params,
                 env_name,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = gym.make(env_name)
        self.preprocessor = ModelCatalog.get_preprocessor(
            env_name, self.env.observation_space.shape)
        self.preprocessor_shape = self.preprocessor.transform_shape(
            self.env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(self.env.observation_space,
                                             self.env.action_space,
                                             self.preprocessor,
                                             **policy_params)
        tf_util.initialize()

        self.rs = np.random.RandomState()

        assert (self.policy.needs_ob_stat == (self.config["calc_obstat_prob"]
                                              != 0))
Example #4
0
    def __init__(self,
                 config,
                 policy_params,
                 env_name,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = gym.make(env_name)
        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.MujocoPolicy(self.env.observation_space,
                                            self.env.action_space,
                                            **policy_params)
        tf_util.initialize()

        self.rs = np.random.RandomState()

        assert self.policy.needs_ob_stat == (self.config.calc_obstat_prob != 0)