def make_environment(suite: str, task: str) -> dm_env.Environment: """Makes the requested continuous control environment. Args: suite: One of 'gym' or 'control'. task: Task to load. If `suite` is 'control', the task must be formatted as f'{domain_name}:{task_name}' Returns: An environment satisfying the dm_env interface expected by Acme agents. """ if suite not in _VALID_TASK_SUITES: raise ValueError( f'Unsupported suite: {suite}. Expected one of {_VALID_TASK_SUITES}' ) if suite == 'gym': env = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. env = wrappers.GymWrapper(env) elif suite == 'control': # Load dm_suite lazily not require Mujoco license when not using it. from dm_control import suite as dm_suite # pylint: disable=g-import-not-at-top domain_name, task_name = task.split(':') env = dm_suite.load(domain_name, task_name) env = wrappers.ConcatObservationWrapper(env) # Wrap the environment so the expected continuous action spec is [-1, 1]. # Note: this is a no-op on 'control' tasks. env = wrappers.CanonicalSpecWrapper(env, clip=True) env = wrappers.SinglePrecisionWrapper(env) return env
def load_environment(env_name): """Outputs a wrapped gym environment.""" environment = gym.make(env_name) environment = TimeLimit(environment, max_episode_steps=1000) environment = wrappers.gym_wrapper.GymWrapper(environment) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def environment(game): """Atari environment.""" env = atari_lib.create_atari_environment(game_name=game, sticky_actions=True) env = AtariDopamineWrapper(env) env = wrappers.FrameStackingWrapper(env, num_frames=4) return wrappers.SinglePrecisionWrapper(env)
def create_environment(task_class, task_name, single_precision=False, **kwargs): env = _create_environment(task_class, task_name, **kwargs) if single_precision: env = wrappers.SinglePrecisionWrapper(env) return env
def make_bsuite_environment(bsuite_id: str = 'deep_sea/0', results_dir: str = '/tmp/bsuite', overwrite: bool = False) -> dm_env.Environment: raw_environment = bsuite.load_and_record_to_csv( bsuite_id=bsuite_id, results_dir=results_dir, overwrite=overwrite, ) return wrappers.SinglePrecisionWrapper(raw_environment)
def run_dqn(experiment_name): current_dir = pathlib.Path().absolute() directories = Save_paths(data_dir=f'{current_dir}/data', experiment_name=experiment_name) game = Winter_is_coming(setup=PARAMS['setup']) environment = wrappers.SinglePrecisionWrapper(game) spec = specs.make_environment_spec(environment) # Build the network. def _make_network(spec) -> snt.Module: network = snt.Sequential([ snt.Flatten(), snt.nets.MLP([50, 50, spec.actions.num_values]), ]) tf2_utils.create_variables(network, [spec.observations]) return network network = _make_network(spec) # Setup the logger if neptune_enabled: agent_logger = NeptuneLogger(label='DQN agent', time_delta=0.1) loop_logger = NeptuneLogger(label='Environment loop', time_delta=0.1) PARAMS['network'] = f'{network}' neptune.init('cvasquez/sandbox') neptune.create_experiment(name=experiment_name, params=PARAMS) else: agent_logger = loggers.TerminalLogger('DQN agent', time_delta=1.) loop_logger = loggers.TerminalLogger('Environment loop', time_delta=1.) # Build the agent agent = DQN( environment_spec=spec, network=network, params=PARAMS, checkpoint=True, paths=directories, logger=agent_logger ) # Try running the environment loop. We have no assertions here because all # we care about is that the agent runs without raising any errors. loop = acme.EnvironmentLoop(environment, agent, logger=loop_logger) loop.run(num_episodes=PARAMS['num_episodes']) last_checkpoint_path = agent.save() # Upload last checkpoint if neptune_upload_checkpoint and last_checkpoint_path: files = os.listdir(last_checkpoint_path) for f in files: neptune.log_artifact(os.path.join(last_checkpoint_path, f)) if neptune_enabled: neptune.stop() do_example_run(game,agent)
def _wrap_training(self, env: gym.Env): env = FilterObservation(env, filter_keys=['lidar']) env = Flatten(env, flatten_obs=True, flatten_actions=True) env = NormalizeObservations(env) env = FixedResetMode(env, mode='random') env = TimeLimit(env, max_episode_steps=self._env_config.training_time_limit) env = ActionRepeat(env, n=self._env_config.action_repeat) env = GymWrapper(environment=env) env = wrappers.SinglePrecisionWrapper(env) return env
def make_environment(evaluation: bool = False, domain_name: str = 'cartpole', task_name: str = 'balance') -> dm_env.Environment: """Implements a control suite environment factory.""" # Nothing special to be done for evaluation environment. del evaluation environment = suite.load(domain_name, task_name) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def environment(combined_challenge, domain, task, log_output=None, environment_kwargs=None): """RWRL environment.""" env = rwrl_envs.load(domain_name=domain, task_name=task, log_output=log_output, environment_kwargs=environment_kwargs, combined_challenge=combined_challenge) return wrappers.SinglePrecisionWrapper(env)
def _wrap_test(self, env: gym.Env): env = FilterObservation(env, filter_keys=['lidar']) env = Flatten(env, flatten_obs=False, flatten_actions=True) env = NormalizeObservations(env) env = InfoToObservation(env) env = FixedResetMode(env, mode='grid') env = TimeLimit(env, max_episode_steps=self._env_config.eval_time_limit) gym_env = ActionRepeat(env, n=self._env_config.action_repeat) env = GymWrapper(environment=gym_env) env = wrappers.SinglePrecisionWrapper(env) env.gym_env = gym_env return env
def environment(self): """Build and return the environment.""" if self._environment is not None: return self._environment self._environment = suite.load(domain_name=self._domain_name, task_name=self._task_name) self._environment = wrappers.SinglePrecisionWrapper(self._environment) self._environment = NormilizeActionSpecWrapper(self._environment) return self._environment
def make_gym_environment( task_name: str = 'MountainCarContinuous-v0') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task_name) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def make_environment( task: str = 'MountainCarContinuous-v0') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def make_environment(domain_name: str = 'cartpole', task_name: str = 'balance') -> dm_env.Environment: """Creates a RWRL suite environment.""" environment = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec=dict(enable=True), delay_spec=dict(enable=True, actions=20), log_output=os.path.join(FLAGS.save_path, 'log.npz'), environment_kwargs=dict(log_safety_vars=True, log_every=2, flat_observation=True)) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def make_environment(evaluation: bool = False, task: str = 'HalfCheetah-v3') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" del evaluation # Load the gym environment. environment = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def environment( combined_challenge: str, domain: str, task: str, log_output: Optional[str] = None, environment_kwargs: Optional[Dict[str, Any]] = None) -> dm_env.Environment: """RWRL environment.""" env = rwrl_envs.load(domain_name=domain, task_name=task, log_output=log_output, environment_kwargs=environment_kwargs, combined_challenge=combined_challenge) return wrappers.SinglePrecisionWrapper(env)
def make_env_and_model() -> Tuple[dm_env.Environment, models.Model]: """Create environment and corresponding model (learned or simulator).""" environment = bsuite.load('catch', kwargs={}) if FLAGS.simulator: model = simulator.Simulator(environment) # pytype: disable=attribute-error else: model = mlp.MLPModel( specs.make_environment_spec(environment), replay_capacity=1000, batch_size=16, hidden_sizes=(50,), ) environment = wrappers.SinglePrecisionWrapper(environment) return environment, model
def make_environment(evaluation: bool = False, domain_name: str = 'cartpole', task_name: str = 'balance', concatenate_observations: bool = False ) -> dm_env.Environment: """Implements a control suite environment factory.""" # Nothing special to be done for evaluation environment. del evaluation environment = suite.load(domain_name, task_name) environment = wrappers.SinglePrecisionWrapper(environment) timestep = environment.reset() obs_names = list(timestep.observation.keys()) if concatenate_observations: environment = wrappers.ConcatObservationWrapper(environment, obs_names) return environment
def main(_): # Create an environment and grab the spec. environment = bsuite.load_from_id('catch/0') environment = wrappers.SinglePrecisionWrapper(environment) environment_spec = specs.make_environment_spec(environment) network = snt.Sequential([ snt.Flatten(), snt.nets.MLP([50, 50, environment_spec.actions.num_values]) ]) # Construct the agent. agent = dqn.DQN(environment_spec=environment_spec, network=network) # Run the environment loop. loop = acme.EnvironmentLoop(environment, agent) loop.run(num_episodes=environment.bsuite_num_episodes) # pytype: disable=attribute-error
def main(_): # Create an environment and grab the spec. raw_environment = bsuite.load_from_id(FLAGS.bsuite_id) environment = wrappers.SinglePrecisionWrapper(raw_environment) environment_spec = specs.make_environment_spec(environment) # Construct the agent. agent = dqfd.DQfD( environment_spec=environment_spec, network=make_network(environment_spec.actions), demonstration_dataset=bsuite_demonstrations.make_dataset(raw_environment), demonstration_ratio=FLAGS.demonstration_ratio, samples_per_insert=FLAGS.samples_per_insert, learning_rate=FLAGS.learning_rate) # Run the environment loop. loop = acme.EnvironmentLoop(environment, agent) loop.run(num_episodes=environment.bsuite_num_episodes) # pytype: disable=attribute-error
def make_env_and_model( bsuite_id: str, results_dir: str, overwrite: bool) -> Tuple[dm_env.Environment, models.Model]: """Create environment and corresponding model (learned or simulator).""" raw_env = bsuite.load_from_id(bsuite_id) if FLAGS.simulator: model = simulator.Simulator(raw_env) # pytype: disable=attribute-error else: model = mlp.MLPModel( specs.make_environment_spec(raw_env), replay_capacity=1000, batch_size=16, hidden_sizes=(50, ), ) environment = csv_logging.wrap_environment(raw_env, bsuite_id, results_dir, overwrite) environment = wrappers.SinglePrecisionWrapper(environment) return environment, model
def test_discrete(self): env = wrappers.SinglePrecisionWrapper( fakes.DiscreteEnvironment( action_dtype=np.int64, obs_dtype=np.int64, reward_dtype=np.float64)) self.assertTrue(np.issubdtype(env.observation_spec().dtype, np.int32)) self.assertTrue(np.issubdtype(env.action_spec().dtype, np.int32)) self.assertTrue(np.issubdtype(env.reward_spec().dtype, np.float32)) self.assertTrue(np.issubdtype(env.discount_spec().dtype, np.float32)) timestep = env.reset() self.assertEqual(timestep.reward, None) self.assertEqual(timestep.discount, None) self.assertTrue(np.issubdtype(timestep.observation.dtype, np.int32)) timestep = env.step(0) self.assertTrue(np.issubdtype(timestep.reward.dtype, np.float32)) self.assertTrue(np.issubdtype(timestep.discount.dtype, np.float32)) self.assertTrue(np.issubdtype(timestep.observation.dtype, np.int32))
def make_environment(task, evaluation = False): """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task) environment = env_wrappers.AdroitSparseRewardWrapper(environment) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) if evaluation: environment = env_wrappers.SuccessRewardWrapper(environment, success_threshold=1.) return environment
def test_loop_run(self): raw_env = rl_environment.Environment('tic_tac_toe') env = open_spiel_wrapper.OpenSpielWrapper(raw_env) env = wrappers.SinglePrecisionWrapper(env) environment_spec = acme.make_environment_spec(env) actors = [] for _ in range(env.num_players): actors.append(RandomActor(environment_spec)) loop = open_spiel_environment_loop.OpenSpielEnvironmentLoop( env, actors) result = loop.run_episode() self.assertIn('episode_length', result) self.assertIn('episode_return', result) self.assertIn('steps_per_second', result) loop.run(num_episodes=10) loop.run(num_steps=100)
def make_single_agent_env(scenario: str, render=False): scenario = SingleAgentScenario.from_spec(scenario, rendering=render) env = VectorizedSingleAgentRaceEnv(scenarios=[scenario]) env = wrap_env(env=env, wrapper_configs='single_agent_wrappers.yml') env = wrappers.GymWrapper(environment=env) env = wrappers.SinglePrecisionWrapper(env) return env # def make_multi_agent_env(scenario: str, render=False, test=False): # scenario = MultiAgentScenario.from_spec(scenario, rendering=render) # env = VectorizedMultiAgentRaceEnv(scenarios=[scenario]) # if test: # env = wrap_env(env=env, wrapper_configs='multi_agent_test_wrappers.yml') # else: # env = wrap_env(env=env, wrapper_configs='multi_agent_wrappers.yml') # # env = MultiAgentGymWrapper(environment=env) # env = wrappers.SinglePrecisionWrapper(env) # return env
def test_continuous(self): env = wrappers.SinglePrecisionWrapper( fakes.ContinuousEnvironment( action_dim=0, dtype=np.float64, reward_dtype=np.float64)) self.assertTrue(np.issubdtype(env.observation_spec().dtype, np.float32)) self.assertTrue(np.issubdtype(env.action_spec().dtype, np.float32)) self.assertTrue(np.issubdtype(env.reward_spec().dtype, np.float32)) self.assertTrue(np.issubdtype(env.discount_spec().dtype, np.float32)) timestep = env.reset() self.assertIsNone(timestep.reward) self.assertIsNone(timestep.discount) self.assertTrue(np.issubdtype(timestep.observation.dtype, np.float32)) timestep = env.step(0.0) self.assertTrue(np.issubdtype(timestep.reward.dtype, np.float32)) self.assertTrue(np.issubdtype(timestep.discount.dtype, np.float32)) self.assertTrue(np.issubdtype(timestep.observation.dtype, np.float32))
def environment(self): """Build and return the environment.""" if self._task_name == 'humanoid_corridor': self._environment = _build_humanoid_corridor_env() elif self._task_name == 'humanoid_gaps': self._environment = _build_humanoid_corridor_gaps() elif self._task_name == 'humanoid_walls': self._environment = _build_humanoid_walls_env() self._environment = NormilizeActionSpecWrapper(self._environment) self._environment = MujocoActionNormalizer( environment=self._environment, rescale='clip') self._environment = wrappers.SinglePrecisionWrapper(self._environment) all_observations = list(self._proprio_keys) + list(self._pixel_keys) self._environment = FilterObservationsWrapper(self._environment, all_observations) return self._environment
def main(_): # Create an environment and grab the spec. raw_environment = bsuite.load_and_record_to_csv( bsuite_id=FLAGS.bsuite_id, results_dir=FLAGS.results_dir, overwrite=FLAGS.overwrite, ) environment = wrappers.SinglePrecisionWrapper(raw_environment) environment_spec = specs.make_environment_spec(environment) network = snt.Sequential([ snt.Flatten(), snt.nets.MLP([50, 50, environment_spec.actions.num_values]) ]) # Construct the agent. agent = dqn.DQN(environment_spec=environment_spec, network=network) # Run the environment loop. loop = acme.EnvironmentLoop(environment, agent) loop.run(num_episodes=environment.bsuite_num_episodes) # pytype: disable=attribute-error
def environment(self): """Return environment.""" if self._task_name == 'rodent_escape': self._environment = _build_rodent_escape_env() elif self._task_name == 'rodent_gaps': self._environment = _build_rodent_corridor_gaps() elif self._task_name == 'rodent_two_touch': self._environment = _build_rodent_two_touch_env() elif self._task_name == 'rodent_mazes': self._environment = _build_rodent_maze_env() self._environment = NormilizeActionSpecWrapper(self._environment) self._environment = MujocoActionNormalizer( environment=self._environment, rescale='clip') self._environment = wrappers.SinglePrecisionWrapper(self._environment) all_observations = list(self._proprio_keys) + list(self._pixel_keys) self._environment = FilterObservationsWrapper(self._environment, all_observations) return self._environment
def main(_): # Create an environment and grab the spec. env_configs = {'players': FLAGS.num_players} if FLAGS.num_players else {} raw_environment = rl_environment.Environment(FLAGS.game, **env_configs) environment = open_spiel_wrapper.OpenSpielWrapper(raw_environment) environment = wrappers.SinglePrecisionWrapper( environment) # type: open_spiel_wrapper.OpenSpielWrapper environment_spec = acme.make_environment_spec(environment) # Build the networks. networks = [] policy_networks = [] for _ in range(environment.num_players): network = legal_actions.MaskedSequential([ snt.Flatten(), snt.nets.MLP([50, 50, environment_spec.actions.num_values]) ]) policy_network = snt.Sequential([ network, legal_actions.EpsilonGreedy(epsilon=0.1, threshold=-1e8) ]) networks.append(network) policy_networks.append(policy_network) # Construct the agents. agents = [] for network, policy_network in zip(networks, policy_networks): agents.append( dqn.DQN(environment_spec=environment_spec, network=network, policy_network=policy_network)) # Run the environment loop. loop = open_spiel_environment_loop.OpenSpielEnvironmentLoop( environment, agents) loop.run(num_episodes=100000)