예제 #1
0
def create_model_oracle(oracle, env, envid, seed, c):
    mor_cls = getattr(Or, c['mor_cls'])
    if mor_cls is Or.SimulationOracle:
        et = c['env_type']
        seed = seed + 1
        if et == 'true':
            sim_env = Env.create_env(envid, seed)
        elif et == 'mlp':
            sim_env = Env.create_sim_env(env, seed, dyn_configs=c['dynamics'])
        else:
            assert isinstance(et, float) and et >= 0.0 and et < 1.0
            sim_env = Env.create_sim_env(env, seed, inaccuracy=et)
        gen_ro = functools.partial(generate_rollout,
                                   env=sim_env,
                                   **c['rollout_kwargs'])
        model_oracle = mor_cls(oracle, sim_env, gen_ro)
    elif (mor_cls is Or.LazyOracle) or (mor_cls is Or.AdversarialOracle):
        model_oracle = mor_cls(oracle, **c['lazyor_kwargs'])
    elif mor_cls is Or.AggregatedOracle:
        model_oracle = mor_cls(Or.LazyOracle(oracle, **c['lazyor_kwargs']),
                               **c['aggor_kwargs'])
    elif mor_cls is Or.DummyOracle:
        model_oracle = mor_cls(oracle)

    else:
        raise ValueError('Unknown model oracle type.')

    return model_oracle
예제 #2
0
def create_env_from_env(env, env_type, seed, dc=None, rc=None):
    if env_type == 'true':
        new_env = Env.create_env(env.env.spec.id, seed)
        new_env._max_episode_steps = env._max_episode_steps  # set the horizon
        return new_env
    elif isinstance(env_type, float) and env_type >= 0.0 and env_type < 1.0:
        # the larger inaccuracy is, the larger randomness will be added.
        return Env.create_sim_env(env, seed, inaccuracy=env_type)
    elif env_type == 'dyn':
        assert dc is not None
        return Env.create_sim_env(env, seed, dc=dc)
    elif env_type == 'dyn-rw':
        assert dc is not None and rc is not None
        return Env.create_sim_env(env, seed, dc=dc, rc=rc)
    else:
        raise ValueError('Unknown env_type {}'.format(env_type))
예제 #3
0
def create_env_from_env(env, env_type, seed, dc=None, rc=None):
    # dc: dynamics config.
    if env_type == 'true':
        # pdb.set_trace()
        new_env = Env.create_env(env.env.spec.id, seed)
        new_env._max_episode_steps = env._max_episode_steps  # set the horizon
        return new_env
    elif isinstance(env_type, float) and env_type >= 0.0 and env_type < 1.0:
        return Env.create_sim_env(env, seed, inaccuracy=env_type)
    elif env_type == 'dyn':
        assert dc is not None
        return Env.create_sim_env(env, seed, dc=dc)
    elif env_type == 'dyn-rw':
        assert dc is not None and rc is not None
        return Env.create_sim_env(env, seed, dc=dc, rc=rc)
    else:
        raise ValueError('Unknown env_type {}'.format(env_type))
예제 #4
0
def test_env(envid):
    cprint('Testing env: {}'.format(envid))
    seed = 10
    n_ro = 50
    env = envs.create_env(envid, seed)
    sim_env = envs.create_sim_env(env, seed, 0.0)
    isclose_kwargs = {'atol': 1e-4, 'rtol': 1e-4}

    for _ in range(n_ro):
        obs = env.reset()
        sim_env.reset()
        # print('============================================================')
        # print(env.env.state)
        # print(sim_env.state)
        sim_env.set_state(env.env.state)
        if isinstance(sim_env, envs.Reacher):
            sim_env.target = env.env.target
        assert np.allclose(env.env.state, sim_env.state)
        while True:
            # a = env.action_space.sample()
            a = np.random.normal(size=env.action_space.sample().shape)  # to have samples outside of range
            rew_sim2 = sim_env._batch_reward(obs[None], sim_env.state[None], a[None])[0]
            obs, rew, done, _ = env.step(a)
            obs_sim, rew_sim, done_sim, _ = sim_env.step(a)
            done_sim2 = sim_env._batch_is_done(obs[None])[0]
            # print(env.env.state)
            # print(sim_env.state)
            assert np.allclose(env.env.state, sim_env.state, **isclose_kwargs)
            assert np.allclose(obs, obs_sim, **isclose_kwargs)
            assert np.isclose(rew, rew_sim, **isclose_kwargs)
            assert np.isclose(rew, rew_sim2, **isclose_kwargs)
            assert done == done_sim
            assert done == done_sim2
            if done:
                break

    sim_env_inacc = envs.create_sim_env(env, seed, 0.2)
    sim_env_inacc.reset()
    a = env.action_space.sample()
    obs, rew, done, _ = sim_env_inacc.step(a)