Beispiel #1
0
def create_model_oracle(oracle, env, envid, seed, c):
    mor_cls = getattr(Or, c['mor_cls'])
    if mor_cls is Or.SimulationOracle:
        et = c['env_type']
        seed = seed + 1
        if et == 'true':
            sim_env = Env.create_env(envid, seed)
        elif et == 'mlp':
            sim_env = Env.create_sim_env(env, seed, dyn_configs=c['dynamics'])
        else:
            assert isinstance(et, float) and et >= 0.0 and et < 1.0
            sim_env = Env.create_sim_env(env, seed, inaccuracy=et)
        gen_ro = functools.partial(generate_rollout,
                                   env=sim_env,
                                   **c['rollout_kwargs'])
        model_oracle = mor_cls(oracle, sim_env, gen_ro)
    elif (mor_cls is Or.LazyOracle) or (mor_cls is Or.AdversarialOracle):
        model_oracle = mor_cls(oracle, **c['lazyor_kwargs'])
    elif mor_cls is Or.AggregatedOracle:
        model_oracle = mor_cls(Or.LazyOracle(oracle, **c['lazyor_kwargs']),
                               **c['aggor_kwargs'])
    elif mor_cls is Or.DummyOracle:
        model_oracle = mor_cls(oracle)

    else:
        raise ValueError('Unknown model oracle type.')

    return model_oracle
Beispiel #2
0
def general_setup(c):
    envid, seed = c['envid'], c['seed'],
    env = Env.create_env(envid, seed)
    # fix randomness
    tf.set_random_seed(seed)  # graph-level seed
    np.random.seed(seed)
    return env, envid, seed
Beispiel #3
0
def general_setup(c):
    envid, seed = c['envid'], c['seed'],
    env = Env.create_env(envid, seed)
    # pdb.set_trace()
    if c['max_episode_steps'] is not None:
        env._max_episode_steps = c['max_episode_steps']
    # fix randomness
    tf.set_random_seed(seed)  # graph-level seed
    np.random.seed(seed)
    return env, seed
Beispiel #4
0
def create_env_from_env(env, env_type, seed, dc=None, rc=None):
    if env_type == 'true':
        new_env = Env.create_env(env.env.spec.id, seed)
        new_env._max_episode_steps = env._max_episode_steps  # set the horizon
        return new_env
    elif isinstance(env_type, float) and env_type >= 0.0 and env_type < 1.0:
        # the larger inaccuracy is, the larger randomness will be added.
        return Env.create_sim_env(env, seed, inaccuracy=env_type)
    elif env_type == 'dyn':
        assert dc is not None
        return Env.create_sim_env(env, seed, dc=dc)
    elif env_type == 'dyn-rw':
        assert dc is not None and rc is not None
        return Env.create_sim_env(env, seed, dc=dc, rc=rc)
    else:
        raise ValueError('Unknown env_type {}'.format(env_type))
Beispiel #5
0
def create_env_from_env(env, env_type, seed, dc=None, rc=None):
    # dc: dynamics config.
    if env_type == 'true':
        # pdb.set_trace()
        new_env = Env.create_env(env.env.spec.id, seed)
        new_env._max_episode_steps = env._max_episode_steps  # set the horizon
        return new_env
    elif isinstance(env_type, float) and env_type >= 0.0 and env_type < 1.0:
        return Env.create_sim_env(env, seed, inaccuracy=env_type)
    elif env_type == 'dyn':
        assert dc is not None
        return Env.create_sim_env(env, seed, dc=dc)
    elif env_type == 'dyn-rw':
        assert dc is not None and rc is not None
        return Env.create_sim_env(env, seed, dc=dc, rc=rc)
    else:
        raise ValueError('Unknown env_type {}'.format(env_type))
Beispiel #6
0
def test_batch_env(envid, n_envs):
    seed = 0
    n_ro = 10
    e = envs.create_env(envid, seed)
    e_ = envs.create_batch_env(envid, seed, n_envs, use_ext_proc=True)
    isclose_kwargs = {'atol': 1e-4, 'rtol': 1e-4}
    for _ in range(n_ro):
        e.reset()
        e_.reset()
        while True:
            a = e.action_space.sample()
            obs, rew, done, _ = e.step(a)
            obs_, rew_, done_, _ = e_.step(a[None])
            assert np.allclose(obs, obs_, **isclose_kwargs)
            assert np.isclose(rew, rew_, **isclose_kwargs)
            assert done == done_
            if done:
                break
Beispiel #7
0
def test_env(envid):
    cprint('Testing env: {}'.format(envid))
    seed = 10
    n_ro = 50
    env = envs.create_env(envid, seed)
    sim_env = envs.create_sim_env(env, seed, 0.0)
    isclose_kwargs = {'atol': 1e-4, 'rtol': 1e-4}

    for _ in range(n_ro):
        obs = env.reset()
        sim_env.reset()
        # print('============================================================')
        # print(env.env.state)
        # print(sim_env.state)
        sim_env.set_state(env.env.state)
        if isinstance(sim_env, envs.Reacher):
            sim_env.target = env.env.target
        assert np.allclose(env.env.state, sim_env.state)
        while True:
            # a = env.action_space.sample()
            a = np.random.normal(size=env.action_space.sample().shape)  # to have samples outside of range
            rew_sim2 = sim_env._batch_reward(obs[None], sim_env.state[None], a[None])[0]
            obs, rew, done, _ = env.step(a)
            obs_sim, rew_sim, done_sim, _ = sim_env.step(a)
            done_sim2 = sim_env._batch_is_done(obs[None])[0]
            # print(env.env.state)
            # print(sim_env.state)
            assert np.allclose(env.env.state, sim_env.state, **isclose_kwargs)
            assert np.allclose(obs, obs_sim, **isclose_kwargs)
            assert np.isclose(rew, rew_sim, **isclose_kwargs)
            assert np.isclose(rew, rew_sim2, **isclose_kwargs)
            assert done == done_sim
            assert done == done_sim2
            if done:
                break

    sim_env_inacc = envs.create_sim_env(env, seed, 0.2)
    sim_env_inacc.reset()
    a = env.action_space.sample()
    obs, rew, done, _ = sim_env_inacc.step(a)
Beispiel #8
0
def time_batch_env(envid, n_envs):
    seed = 0
    n_ro = 5000
    e = envs.create_env(envid, seed)

    def pi(obs):
        ac = e.action_space.sample()
        ac = [ac for _ in range(len(obs))]
        return ac

    # env = envs.create_batch_env(envid, seed, 1, use_ext_proc=False)
    # roller = Roller(env, min_n_samples=None, max_n_rollouts=n_ro, max_rollout_len=None)
    # with timed('1 env generate {} ros'.format(n_ro)):
    #     roller.gen_ro(pi=pi, logp=None)

    # env = envs.create_batch_env(envid, seed, n_envs, use_ext_proc=True)
    # roller = Roller(env, min_n_samples=None, max_n_rollouts=n_ro, max_rollout_len=None)
    # with timed('{} envs parallel generating {} ros'.format(n_envs, n_ro)):
    #     roller.gen_ro(pi=pi, logp=None)

    e = envs.create_batch_env(envid, seed, 1, use_ext_proc=False)
    with timed(''):
        generate_rollout(lambda ob: e.action_space.sample(),
                         None, e, min_n_samples=None, max_n_rollouts=n_ro, max_rollout_len=None)