def test_pickleable(self):
     env = GridWorldEnv(desc="8x8")
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     assert round_trip.start_state == env.start_state
     step_env(round_trip)
     round_trip.close()
     env.close()
Esempio n. 2
0
 def test_does_not_modify_action(self):
     env = GridWorldEnv(desc='8x8')
     a = env.action_space.sample()
     a_copy = a
     env.reset()
     env.step(a)
     assert a == a_copy
     env.close()
 def test_does_not_modify_action(self):
     env = GridWorldEnv(desc="8x8")
     a = env.action_space.sample()
     a_copy = a
     env.reset()
     env.step(a)
     self.assertEqual(a, a_copy)
     env.close()
Esempio n. 4
0
 def setup_method(self):
     self.env = TfEnv(GridWorldEnv(desc='4x4'))
     self.policy = ScriptedPolicy(
         scripted_actions=[2, 2, 1, 0, 3, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1])
     self.algo = Mock(env_spec=self.env.spec,
                      policy=self.policy,
                      max_path_length=16)
    def setup_method(self):
        ray.init(local_mode=True, ignore_reinit_error=True)

        self.env = TfEnv(GridWorldEnv(desc='4x4'))
        self.policy = ScriptedPolicy(
            scripted_actions=[2, 2, 1, 0, 3, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1])
        self.algo = Mock(
            env_spec=self.env.spec, policy=self.policy, max_path_length=16)
Esempio n. 6
0
def other_envs():
    descs = [
        ['FFFS', 'FHFH', 'FFFH', 'HFFG'],
        ['FFSF', 'FFFH', 'FHFH', 'HFFG'],
        ['FFFF', 'FFSH', 'FHFH', 'FFFG'],
        ['FFFF', 'FFFF', 'FSFF', 'FFFF'],
        ['HHFF', 'HHHF', 'HSHF', 'HHHF'],
    ]
    return [TfEnv(GridWorldEnv(desc=desc)) for desc in descs]
Esempio n. 7
0
def envs():
    descs = [
        ['SFFF', 'FHFH', 'FFFH', 'HFFG'],
        ['SFFF', 'FFFH', 'FHFH', 'HFFG'],
        ['SFFF', 'FFFH', 'FHFH', 'FFFG'],
        ['SFFF', 'FFFF', 'FFFF', 'FFFF'],
        ['SHFF', 'HHFF', 'FFFF', 'FFFF'],
    ]
    return [TfEnv(GridWorldEnv(desc=desc)) for desc in descs]
def test_obtain_samples(ray_local_session_fixture):
    del ray_local_session_fixture
    env = GridWorldEnv(desc='4x4')
    policy = ScriptedPolicy(
        scripted_actions=[2, 2, 1, 0, 3, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1])
    algo = Mock(env_spec=env.spec, policy=policy, max_episode_length=16)

    assert ray.is_initialized()
    workers = WorkerFactory(seed=100,
                            max_episode_length=algo.max_episode_length,
                            n_workers=8)
    sampler1 = RaySampler.from_worker_factory(workers, policy, env)
    sampler2 = LocalSampler.from_worker_factory(workers, policy, env)
    eps1 = sampler1.obtain_samples(0, 1000,
                                   tuple(algo.policy.get_param_values()))
    eps2 = sampler2.obtain_samples(0, 1000,
                                   tuple(algo.policy.get_param_values()))

    assert eps1.observations.shape[0] >= 1000
    assert eps1.actions.shape[0] >= 1000
    assert (sum(eps1.rewards[:eps1.lengths[0]]) == sum(
        eps2.rewards[:eps2.lengths[0]]) == 1)

    true_obs = np.array([0, 1, 2, 6, 10, 14])
    true_actions = np.array([2, 2, 1, 1, 1, 2])
    true_rewards = np.array([0, 0, 0, 0, 0, 1])
    start = 0
    for length in eps1.lengths:
        observations = eps1.observations[start:start + length]
        actions = eps1.actions[start:start + length]
        rewards = eps1.rewards[start:start + length]
        assert np.array_equal(observations, true_obs)
        assert np.array_equal(actions, true_actions)
        assert np.array_equal(rewards, true_rewards)
        start += length
    sampler1.shutdown_worker()
    sampler2.shutdown_worker()
    env.close()
Esempio n. 9
0
def test_obtain_samples():
    env = GarageEnv(GridWorldEnv(desc='4x4'))
    policy = ScriptedPolicy(
        scripted_actions=[2, 2, 1, 0, 3, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1])
    algo = Mock(env_spec=env.spec, policy=policy, max_episode_length=16)

    workers = WorkerFactory(seed=100,
                            max_episode_length=algo.max_episode_length,
                            n_workers=8)
    sampler1 = MultiprocessingSampler.from_worker_factory(workers, policy, env)
    sampler2 = LocalSampler.from_worker_factory(workers, policy, env)
    trajs1 = sampler1.obtain_samples(0, 1000,
                                     tuple(algo.policy.get_param_values()))
    trajs2 = sampler2.obtain_samples(0, 1000,
                                     tuple(algo.policy.get_param_values()))
    # pylint: disable=superfluous-parens
    assert trajs1.observations.shape[0] >= 1000
    assert trajs1.actions.shape[0] >= 1000
    assert (sum(trajs1.rewards[:trajs1.lengths[0]]) == sum(
        trajs2.rewards[:trajs2.lengths[0]]) == 1)

    true_obs = np.array([0, 1, 2, 6, 10, 14])
    true_actions = np.array([2, 2, 1, 1, 1, 2])
    true_rewards = np.array([0, 0, 0, 0, 0, 1])
    start = 0
    for length in trajs1.lengths:
        observations = trajs1.observations[start:start + length]
        actions = trajs1.actions[start:start + length]
        rewards = trajs1.rewards[start:start + length]
        assert np.array_equal(observations, true_obs)
        assert np.array_equal(actions, true_actions)
        assert np.array_equal(rewards, true_rewards)
        start += length
    sampler1.shutdown_worker()
    sampler2.shutdown_worker()
    env.close()
Esempio n. 10
0
def test_ray_batch_sampler(ray_local_session_fixture):
    del ray_local_session_fixture
    env = TfEnv(GridWorldEnv(desc='4x4'))
    policy = ScriptedPolicy(
        scripted_actions=[2, 2, 1, 0, 3, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1])
    algo = Mock(env_spec=env.spec, policy=policy, max_path_length=16)
    assert ray.is_initialized()
    workers = WorkerFactory(seed=100, max_path_length=algo.max_path_length)
    sampler1 = RaySampler(workers, policy, env)
    sampler1.start_worker()
    sampler2 = OnPolicyVectorizedSampler(algo, env)
    sampler2.start_worker()
    trajs1 = sampler1.obtain_samples(0, 1000,
                                     tuple(algo.policy.get_param_values()))
    trajs2 = sampler2.obtain_samples(0, 1000)
    # pylint: disable=superfluous-parens
    assert trajs1.observations.shape[0] >= 1000
    assert trajs1.actions.shape[0] >= 1000
    assert (sum(trajs1.rewards[:trajs1.lengths[0]]) == sum(
        trajs2[0]['rewards']) == 1)

    true_obs = np.array([0, 1, 2, 6, 10, 14])
    true_actions = np.array([2, 2, 1, 1, 1, 2])
    true_rewards = np.array([0, 0, 0, 0, 0, 1])
    start = 0
    for length in trajs1.lengths:
        observations = trajs1.observations[start:start + length]
        actions = trajs1.actions[start:start + length]
        rewards = trajs1.rewards[start:start + length]
        assert np.array_equal(observations, true_obs)
        assert np.array_equal(actions, true_actions)
        assert np.array_equal(rewards, true_rewards)
        start += length
    sampler1.shutdown_worker()
    sampler2.shutdown_worker()
    env.close()
Esempio n. 11
0
def env():
    return TfEnv(GridWorldEnv(desc='4x4'))