Ejemplos de RaySampler.start_worker en Python

Lenguaje de programación: Python

Namespace/Package Name: garage.sampler

Clase / Tipo: RaySampler

Método / Función: start_worker

Ejemplos en hotexamples.com: 6

Python RaySampler.start_worker - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de garage.sampler.RaySampler.start_worker extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

RaySampler(30)

shutdown_worker(6)

start_worker(6)

from_worker_factory(5)

obtain_samples(4)

Ejemplo n.º 1

Mostrar archivo

    def test_ray_batch_sampler(self):
        sampler1 = RaySampler(self.algo,
                              self.env,
                              seed=100,
                              num_processors=1,
                              sampler_worker_cls=SamplerWorker)
        sampler1.start_worker()
        sampler2 = OnPolicyVectorizedSampler(self.algo, self.env)
        sampler2.start_worker()
        trajs1 = sampler1.obtain_samples(0, 16)
        trajs2 = sampler2.obtain_samples(0, 1)
        assert (trajs1[0]['observations'].shape == np.array(
            trajs2[0]['observations']).shape == (6, 16))
        traj2_action_shape = np.array(trajs2[0]['actions']).shape
        assert (trajs1[0]['actions'].shape == traj2_action_shape == (6, 4))
        assert (sum(trajs1[0]['rewards']) == sum(trajs2[0]['rewards']) == 1)

        true_obs = np.array(
            [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
             [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
             [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
             [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
             [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
             [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]])
        true_actions = np.array([[0., 0., 1., 0.], [0., 0., 1., 0.],
                                 [0., 1., 0., 0.], [0., 1., 0., 0.],
                                 [0., 1., 0., 0.], [0., 0., 1., 0.]])
        true_rewards = np.array([0, 0, 0, 0, 0, 1])
        for trajectory in trajs1:
            assert (np.array_equal(trajectory['observations'], true_obs))
            assert (np.array_equal(trajectory['actions'], true_actions))
            assert (np.array_equal(trajectory['rewards'], true_rewards))
        sampler1.shutdown_worker()
        sampler2.shutdown_worker()

Ejemplo n.º 2

Mostrar archivo

    def test_ray_batch_sampler(self):
        workers = WorkerFactory(seed=100,
                                max_path_length=self.algo.max_path_length)
        sampler1 = RaySampler(workers, self.policy, self.env, num_processors=1)
        sampler1.start_worker()
        sampler2 = OnPolicyVectorizedSampler(self.algo, self.env)
        sampler2.start_worker()
        trajs1 = sampler1.obtain_samples(
            0, 1000, tuple(self.algo.policy.get_param_values()))
        trajs2 = sampler2.obtain_samples(0, 1000)
        # pylint: disable=superfluous-parens
        assert (trajs1[0]['observations'].shape == np.array(
            trajs2[0]['observations']).shape == (6, ))
        traj2_action_shape = np.array(trajs2[0]['actions']).shape
        assert trajs1[0]['actions'].shape == traj2_action_shape == (6, )
        assert sum(trajs1[0]['rewards']) == sum(trajs2[0]['rewards']) == 1

        true_obs = np.array([0, 1, 2, 6, 10, 14])
        true_actions = np.array([2, 2, 1, 1, 1, 2])
        true_rewards = np.array([0, 0, 0, 0, 0, 1])
        for trajectory in trajs1:
            assert np.array_equal(trajectory['observations'], true_obs)
            assert np.array_equal(trajectory['actions'], true_actions)
            assert np.array_equal(trajectory['rewards'], true_rewards)
        sampler1.shutdown_worker()
        sampler2.shutdown_worker()

Ejemplo n.º 3

Mostrar archivo

Archivo: test_ray_batched_sampler.py Proyecto: waldow90/garage

    def test_ray_batch_sampler(self):
        workers = WorkerFactory(seed=100,
                                max_path_length=self.algo.max_path_length)
        sampler1 = RaySampler(workers, self.policy, self.env)
        sampler1.start_worker()
        sampler2 = OnPolicyVectorizedSampler(self.algo, self.env)
        sampler2.start_worker()
        trajs1 = sampler1.obtain_samples(
            0, 1000, tuple(self.algo.policy.get_param_values()))
        trajs2 = sampler2.obtain_samples(0, 1000)
        # pylint: disable=superfluous-parens
        assert trajs1.observations.shape[0] >= 1000
        assert trajs1.actions.shape[0] >= 1000
        assert (sum(trajs1.rewards[:trajs1.lengths[0]]) == sum(
            trajs2[0]['rewards']) == 1)

        true_obs = np.array([0, 1, 2, 6, 10, 14])
        true_actions = np.array([2, 2, 1, 1, 1, 2])
        true_rewards = np.array([0, 0, 0, 0, 0, 1])
        start = 0
        for length in trajs1.lengths:
            observations = trajs1.observations[start:start + length]
            actions = trajs1.actions[start:start + length]
            rewards = trajs1.rewards[start:start + length]
            assert np.array_equal(observations, true_obs)
            assert np.array_equal(actions, true_actions)
            assert np.array_equal(rewards, true_rewards)
            start += length
        sampler1.shutdown_worker()
        sampler2.shutdown_worker()

Ejemplo n.º 4

Mostrar archivo

 def test_ray_batch_sampler(self, ray_local_session_fixture):
     del ray_local_session_fixture
     assert ray.is_initialized()
     workers = WorkerFactory(
         seed=100, max_episode_length=self.algo.max_episode_length)
     sampler1 = RaySampler(workers, self.policy, self.env)
     sampler1.start_worker()
     sampler1.shutdown_worker()

Ejemplo n.º 5

Mostrar archivo

def test_ray_batch_sampler(ray_local_session_fixture):
    del ray_local_session_fixture
    env = TfEnv(GridWorldEnv(desc='4x4'))
    policy = ScriptedPolicy(
        scripted_actions=[2, 2, 1, 0, 3, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1])
    algo = Mock(env_spec=env.spec, policy=policy, max_path_length=16)
    assert ray.is_initialized()
    workers = WorkerFactory(seed=100, max_path_length=algo.max_path_length)
    sampler1 = RaySampler(workers, policy, env)
    sampler1.start_worker()
    sampler2 = OnPolicyVectorizedSampler(algo, env)
    sampler2.start_worker()
    trajs1 = sampler1.obtain_samples(0, 1000,
                                     tuple(algo.policy.get_param_values()))
    trajs2 = sampler2.obtain_samples(0, 1000)
    # pylint: disable=superfluous-parens
    assert trajs1.observations.shape[0] >= 1000
    assert trajs1.actions.shape[0] >= 1000
    assert (sum(trajs1.rewards[:trajs1.lengths[0]]) == sum(
        trajs2[0]['rewards']) == 1)

    true_obs = np.array([0, 1, 2, 6, 10, 14])
    true_actions = np.array([2, 2, 1, 1, 1, 2])
    true_rewards = np.array([0, 0, 0, 0, 0, 1])
    start = 0
    for length in trajs1.lengths:
        observations = trajs1.observations[start:start + length]
        actions = trajs1.actions[start:start + length]
        rewards = trajs1.rewards[start:start + length]
        assert np.array_equal(observations, true_obs)
        assert np.array_equal(actions, true_actions)
        assert np.array_equal(rewards, true_rewards)
        start += length
    sampler1.shutdown_worker()
    sampler2.shutdown_worker()
    env.close()

Ejemplo n.º 6

Mostrar archivo

 def test_ray_batch_sampler(self):
     workers = WorkerFactory(seed=100,
                             max_path_length=self.algo.max_path_length)
     sampler1 = RaySampler(workers, self.policy, self.env)
     sampler1.start_worker()
     sampler1.shutdown_worker()