Python GenericPolicy 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ray.rllib.es.policies

메소드/함수: GenericPolicy

hotexamples.com에서의 예제들: 4

Python GenericPolicy - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ray.rllib.es.policies.GenericPolicy에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def _init(self):
        policy_params = {
            "action_noise_std": 0.01
        }

        env = self.env_creator(self.config["env_config"])
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(
            self.registry, env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(
            self.registry, self.sess, env.action_space, preprocessor,
            self.config["observation_filter"], **policy_params)
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(
                self.registry, self.config, policy_params, self.env_creator,
                noise_id)
            for _ in range(self.config["num_workers"])]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()

예제 #2

파일 보기

    def _init(self):

        policy_params = {
            "ac_noise_std": 0.01
        }

        env = self.env_creator()
        preprocessor = ModelCatalog.get_preprocessor(
            env.spec.id, env.observation_space.shape)
        preprocessor_shape = preprocessor.transform_shape(
            env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(
            env.observation_space, env.action_space, preprocessor,
            **policy_params)
        tf_util.initialize()
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
        self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2)

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(
                self.config, policy_params, self.env_creator, noise_id)
            for _ in range(self.config["num_workers"])]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()

예제 #3

파일 보기

파일: es.py 프로젝트: vtpp2014/ray

    def __init__(self,
                 config,
                 policy_params,
                 env_name,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = gym.make(env_name)
        self.preprocessor = ModelCatalog.get_preprocessor(
            env_name, self.env.observation_space.shape)
        self.preprocessor_shape = self.preprocessor.transform_shape(
            self.env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(self.env.observation_space,
                                             self.env.action_space,
                                             self.preprocessor,
                                             **policy_params)
        tf_util.initialize()

        self.rs = np.random.RandomState()

        assert (self.policy.needs_ob_stat == (self.config["calc_obstat_prob"]
                                              != 0))

예제 #4

파일 보기

파일: es.py 프로젝트: ray-project/sandbox

    def __init__(self, registry, config, policy_params, env_creator, noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = env_creator(config["env_config"])
        self.preprocessor = ModelCatalog.get_preprocessor(registry, self.env)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(
            registry, self.sess, self.env.action_space, self.preprocessor,
            config["observation_filter"], **policy_params)