def create_model_oracle(oracle, env, envid, seed, c): mor_cls = getattr(Or, c['mor_cls']) if mor_cls is Or.SimulationOracle: et = c['env_type'] seed = seed + 1 if et == 'true': sim_env = Env.create_env(envid, seed) elif et == 'mlp': sim_env = Env.create_sim_env(env, seed, dyn_configs=c['dynamics']) else: assert isinstance(et, float) and et >= 0.0 and et < 1.0 sim_env = Env.create_sim_env(env, seed, inaccuracy=et) gen_ro = functools.partial(generate_rollout, env=sim_env, **c['rollout_kwargs']) model_oracle = mor_cls(oracle, sim_env, gen_ro) elif (mor_cls is Or.LazyOracle) or (mor_cls is Or.AdversarialOracle): model_oracle = mor_cls(oracle, **c['lazyor_kwargs']) elif mor_cls is Or.AggregatedOracle: model_oracle = mor_cls(Or.LazyOracle(oracle, **c['lazyor_kwargs']), **c['aggor_kwargs']) elif mor_cls is Or.DummyOracle: model_oracle = mor_cls(oracle) else: raise ValueError('Unknown model oracle type.') return model_oracle
def create_env_from_env(env, env_type, seed, dc=None, rc=None): if env_type == 'true': new_env = Env.create_env(env.env.spec.id, seed) new_env._max_episode_steps = env._max_episode_steps # set the horizon return new_env elif isinstance(env_type, float) and env_type >= 0.0 and env_type < 1.0: # the larger inaccuracy is, the larger randomness will be added. return Env.create_sim_env(env, seed, inaccuracy=env_type) elif env_type == 'dyn': assert dc is not None return Env.create_sim_env(env, seed, dc=dc) elif env_type == 'dyn-rw': assert dc is not None and rc is not None return Env.create_sim_env(env, seed, dc=dc, rc=rc) else: raise ValueError('Unknown env_type {}'.format(env_type))
def create_env_from_env(env, env_type, seed, dc=None, rc=None): # dc: dynamics config. if env_type == 'true': # pdb.set_trace() new_env = Env.create_env(env.env.spec.id, seed) new_env._max_episode_steps = env._max_episode_steps # set the horizon return new_env elif isinstance(env_type, float) and env_type >= 0.0 and env_type < 1.0: return Env.create_sim_env(env, seed, inaccuracy=env_type) elif env_type == 'dyn': assert dc is not None return Env.create_sim_env(env, seed, dc=dc) elif env_type == 'dyn-rw': assert dc is not None and rc is not None return Env.create_sim_env(env, seed, dc=dc, rc=rc) else: raise ValueError('Unknown env_type {}'.format(env_type))
def test_env(envid): cprint('Testing env: {}'.format(envid)) seed = 10 n_ro = 50 env = envs.create_env(envid, seed) sim_env = envs.create_sim_env(env, seed, 0.0) isclose_kwargs = {'atol': 1e-4, 'rtol': 1e-4} for _ in range(n_ro): obs = env.reset() sim_env.reset() # print('============================================================') # print(env.env.state) # print(sim_env.state) sim_env.set_state(env.env.state) if isinstance(sim_env, envs.Reacher): sim_env.target = env.env.target assert np.allclose(env.env.state, sim_env.state) while True: # a = env.action_space.sample() a = np.random.normal(size=env.action_space.sample().shape) # to have samples outside of range rew_sim2 = sim_env._batch_reward(obs[None], sim_env.state[None], a[None])[0] obs, rew, done, _ = env.step(a) obs_sim, rew_sim, done_sim, _ = sim_env.step(a) done_sim2 = sim_env._batch_is_done(obs[None])[0] # print(env.env.state) # print(sim_env.state) assert np.allclose(env.env.state, sim_env.state, **isclose_kwargs) assert np.allclose(obs, obs_sim, **isclose_kwargs) assert np.isclose(rew, rew_sim, **isclose_kwargs) assert np.isclose(rew, rew_sim2, **isclose_kwargs) assert done == done_sim assert done == done_sim2 if done: break sim_env_inacc = envs.create_sim_env(env, seed, 0.2) sim_env_inacc.reset() a = env.action_space.sample() obs, rew, done, _ = sim_env_inacc.step(a)