Python RunningStat Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.es.utils

Method/Function: RunningStat

Examples at hotexamples.com: 2

Python RunningStat - 2 examples found. These are the top rated real world Python examples of ray.rllib.es.utils.RunningStat extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: es.py Project: techscientist/ray-1

    def _init(self):

        policy_params = {"ac_noise_std": 0.01}

        env = self.env_creator()
        preprocessor = ModelCatalog.get_preprocessor(
            env.spec.id, env.observation_space.shape)
        preprocessor_shape = preprocessor.transform_shape(
            env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(env.observation_space,
                                             env.action_space, preprocessor,
                                             **policy_params)
        tf_util.initialize()
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
        self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2)

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.config, policy_params, self.env_creator,
                          noise_id) for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()

Example #2

Show file

File: es.py Project: techscientist/ray-1

    def do_rollouts(self, params, ob_mean, ob_std, timestep_limit=None):
        # Set the network weights.
        self.policy.set_trainable_flat(params)

        if self.policy.needs_ob_stat:
            self.policy.set_ob_stat(ob_mean, ob_std)

        if self.config["eval_prob"] != 0:
            raise NotImplementedError("Eval rollouts are not implemented.")

        noise_inds, returns, sign_returns, lengths = [], [], [], []
        # We set eps=0 because we're incrementing only.
        task_ob_stat = utils.RunningStat(self.preprocessor_shape, eps=0)

        # Perform some rollouts with noise.
        task_tstart = time.time()
        while (len(noise_inds) == 0
               or time.time() - task_tstart < self.min_task_runtime):
            noise_idx = self.noise.sample_index(self.rs,
                                                self.policy.num_params)
            perturbation = self.config["noise_stdev"] * self.noise.get(
                noise_idx, self.policy.num_params)

            # These two sampling steps could be done in parallel on different
            # actors letting us update twice as frequently.
            self.policy.set_trainable_flat(params + perturbation)
            rews_pos, len_pos = self.rollout_and_update_ob_stat(
                timestep_limit, task_ob_stat)

            self.policy.set_trainable_flat(params - perturbation)
            rews_neg, len_neg = self.rollout_and_update_ob_stat(
                timestep_limit, task_ob_stat)

            noise_inds.append(noise_idx)
            returns.append([rews_pos.sum(), rews_neg.sum()])
            sign_returns.append(
                [np.sign(rews_pos).sum(),
                 np.sign(rews_neg).sum()])
            lengths.append([len_pos, len_neg])

            return Result(
                noise_inds_n=np.array(noise_inds),
                returns_n2=np.array(returns, dtype=np.float32),
                sign_returns_n2=np.array(sign_returns, dtype=np.float32),
                lengths_n2=np.array(lengths, dtype=np.int32),
                eval_return=None,
                eval_length=None,
                ob_sum=(None if task_ob_stat.count == 0 else task_ob_stat.sum),
                ob_sumsq=(None
                          if task_ob_stat.count == 0 else task_ob_stat.sumsq),
                ob_count=task_ob_stat.count)