def make_environment(suite: str, task: str) -> dm_env.Environment: """Makes the requested continuous control environment. Args: suite: One of 'gym' or 'control'. task: Task to load. If `suite` is 'control', the task must be formatted as f'{domain_name}:{task_name}' Returns: An environment satisfying the dm_env interface expected by Acme agents. """ if suite not in _VALID_TASK_SUITES: raise ValueError( f'Unsupported suite: {suite}. Expected one of {_VALID_TASK_SUITES}' ) if suite == 'gym': env = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. env = wrappers.GymWrapper(env) elif suite == 'control': # Load dm_suite lazily not require Mujoco license when not using it. from dm_control import suite as dm_suite # pylint: disable=g-import-not-at-top domain_name, task_name = task.split(':') env = dm_suite.load(domain_name, task_name) env = wrappers.ConcatObservationWrapper(env) # Wrap the environment so the expected continuous action spec is [-1, 1]. # Note: this is a no-op on 'control' tasks. env = wrappers.CanonicalSpecWrapper(env, clip=True) env = wrappers.SinglePrecisionWrapper(env) return env
def make_environment( task: str = 'MountainCarContinuous-v0') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def make_environment(evaluation: bool = False, task: str = 'HalfCheetah-v3') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" del evaluation # Load the gym environment. environment = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def make_environment(task, evaluation = False): """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task) environment = env_wrappers.AdroitSparseRewardWrapper(environment) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) if evaluation: environment = env_wrappers.SuccessRewardWrapper(environment, success_threshold=1.) return environment
def make_environment( evaluation: bool = False, domain_name: str = 'cartpole', task_name: str = 'balance', from_pixels: bool = False, frames_to_stack: int = 3, flatten_stack: bool = False, num_action_repeats: Optional[int] = None, ) -> dm_env.Environment: """Implements a control suite environment factory.""" # Load dm_suite lazily not require Mujoco license when not using it. from dm_control import suite # pylint: disable=g-import-not-at-top from acme.wrappers import mujoco as mujoco_wrappers # pylint: disable=g-import-not-at-top # Load raw control suite environment. environment = suite.load(domain_name, task_name) # Maybe wrap to get pixel observations from environment state. if from_pixels: environment = mujoco_wrappers.MujocoPixelWrapper(environment) environment = wrappers.FrameStackingWrapper(environment, num_frames=frames_to_stack, flatten=flatten_stack) environment = wrappers.CanonicalSpecWrapper(environment, clip=True) if num_action_repeats: environment = wrappers.ActionRepeatWrapper( environment, num_repeats=num_action_repeats) environment = wrappers.SinglePrecisionWrapper(environment) if evaluation: # The evaluator in the distributed agent will set this to True so you can # use this clause to, e.g., set up video recording by the evaluator. pass return environment