Example #1
0
File: es.py Project: ml-squad/ray
    def _init(self):
        policy_params = {"action_noise_std": 0.01}

        env = self.env_creator(self.config["env_config"])
        preprocessor = ModelCatalog.get_preprocessor(self.registry, env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(self.registry, self.sess,
                                             env.action_space, preprocessor,
                                             self.config["observation_filter"],
                                             **policy_params)
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.registry, self.config, policy_params,
                          self.env_creator, noise_id)
            for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()
Example #2
0
    def __init__(self,
                 config,
                 policy_params,
                 env_creator,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = env_creator()
        self.preprocessor = ModelCatalog.get_preprocessor(
            self.env.spec.id, self.env.observation_space.shape)
        self.preprocessor_shape = self.preprocessor.transform_shape(
            self.env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(self.env.observation_space,
                                             self.env.action_space,
                                             self.preprocessor,
                                             **policy_params)
        tf_util.initialize()

        self.rs = np.random.RandomState()

        assert (self.policy.needs_ob_stat == (self.config["calc_obstat_prob"]
                                              != 0))
Example #3
0
    def _init(self):

        policy_params = {"ac_noise_std": 0.01}

        env = self.env_creator()
        preprocessor = ModelCatalog.get_preprocessor(
            env.spec.id, env.observation_space.shape)
        preprocessor_shape = preprocessor.transform_shape(
            env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(env.observation_space,
                                             env.action_space, preprocessor,
                                             **policy_params)
        tf_util.initialize()
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
        self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2)

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.config, policy_params, self.env_creator,
                          noise_id) for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()
Example #4
0
File: ars.py Project: nskh/ray
    def _init(self):

        env = self.env_creator(self.config["env_config"])
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(self.registry, env)

        self.timesteps = 0
        self.num_deltas = self.config["num_deltas"]
        self.deltas_used = self.config["deltas_used"]
        self.step_size = self.config["sgd_stepsize"]
        self.delta_std = self.config["delta_std"]
        seed = self.config["seed"]
        self.shift = self.config["shift"]
        self.max_past_avg_reward = float('-inf')
        self.num_episodes_used = float('inf')

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.deltas = SharedNoiseTable(ray.get(noise_id), seed=seed + 3)

        # Create the actors.
        print("Creating actors.")
        self.num_workers = self.config["num_workers"]
        self.workers = [
            Worker.remote(self.registry,
                          self.config,
                          self.env_creator,
                          seed + 7 * i,
                          deltas=noise_id,
                          rollout_length=env.spec.max_episode_steps,
                          delta_std=self.delta_std)
            for i in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0

        self.sess = utils.make_session(single_threaded=False)
        # initialize policy
        if self.config['policy'] == 'MLP':
            self.policy = MLPPolicy(self.registry, self.sess, env.action_space,
                                    preprocessor,
                                    self.config["observation_filter"])
        else:
            self.policy = LinearPolicy(self.registry, self.sess,
                                       env.action_space, preprocessor,
                                       self.config["observation_filter"])
        self.w_policy = self.policy.get_weights()

        # initialize optimization algorithm
        self.optimizer = optimizers.SGD(self.w_policy,
                                        self.config["sgd_stepsize"])
        print("Initialization of ARS complete.")
Example #5
0
    def __init__(self, registry, config, policy_params, env_creator, noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = env_creator(config["env_config"])
        self.preprocessor = ModelCatalog.get_preprocessor(registry, self.env)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(
            registry, self.sess, self.env.action_space, self.preprocessor,
            config["observation_filter"], **policy_params)
Example #6
0
File: ars.py Project: nskh/ray
    def __init__(self,
                 registry,
                 config,
                 env_creator,
                 env_seed,
                 deltas=None,
                 rollout_length=1000,
                 delta_std=0.02):

        # initialize OpenAI environment for each worker
        self.env = env_creator(config["env_config"])
        self.env.seed(env_seed)

        from ray.rllib import models
        self.preprocessor = models.ModelCatalog.get_preprocessor(
            registry, self.env)

        # each worker gets access to the shared noise table
        # with independent random streams for sampling
        # from the shared noise table.
        self.deltas = SharedNoiseTable(deltas, env_seed + 7)

        from ray.rllib import models
        self.preprocessor = models.ModelCatalog.get_preprocessor(
            registry, self.env)

        self.delta_std = delta_std
        self.rollout_length = rollout_length
        self.sess = utils.make_session(single_threaded=True)
        if config['policy'] == 'Linear':
            self.policy = LinearPolicy(registry, self.sess,
                                       self.env.action_space,
                                       self.preprocessor,
                                       config["observation_filter"])
        else:
            self.policy = MLPPolicy(registry, self.sess, self.env.action_space,
                                    self.preprocessor,
                                    config["observation_filter"])