Example #1
0
def create_d4rl_env_and_dataset(
    task_name,
    batch_size
):
  """Create gym environment and dataset for d4rl.

  Args:
    task_name: Name of d4rl task.
    batch_size: Mini batch size.
  Returns:
    Gym env and dataset.
  """
  env = gym.make(task_name)
  env = wrappers.GymWrapper(env)
  dataset = d4rl.qlearning_dataset(env)

  states = np.array(dataset['observations'], dtype=np.float32)
  actions = np.array(dataset['actions'], dtype=np.float32)
  rewards = np.array(dataset['rewards'], dtype=np.float32)
  discounts = np.array(np.logical_not(dataset['terminals']), dtype=np.float32)
  next_states = np.array(dataset['next_observations'], dtype=np.float32)

  dataset = tf_data.Dataset.from_tensor_slices(
      Inputs(data=(states, actions, rewards, discounts, next_states))
      ).cache().shuffle(
          states.shape[0], reshuffle_each_iteration=True
          ).repeat().batch(
            batch_size, drop_remainder=True
          ).prefetch(tf_data.experimental.AUTOTUNE)
  return env, dataset
Example #2
0
def make_environment(suite: str, task: str) -> dm_env.Environment:
    """Makes the requested continuous control environment.

  Args:
    suite: One of 'gym' or 'control'.
    task: Task to load. If `suite` is 'control', the task must be formatted as
      f'{domain_name}:{task_name}'

  Returns:
    An environment satisfying the dm_env interface expected by Acme agents.
  """

    if suite not in _VALID_TASK_SUITES:
        raise ValueError(
            f'Unsupported suite: {suite}. Expected one of {_VALID_TASK_SUITES}'
        )

    if suite == 'gym':
        env = gym.make(task)
        # Make sure the environment obeys the dm_env.Environment interface.
        env = wrappers.GymWrapper(env)

    elif suite == 'control':
        # Load dm_suite lazily not require Mujoco license when not using it.
        from dm_control import suite as dm_suite  # pylint: disable=g-import-not-at-top
        domain_name, task_name = task.split(':')
        env = dm_suite.load(domain_name, task_name)
        env = wrappers.ConcatObservationWrapper(env)

    # Wrap the environment so the expected continuous action spec is [-1, 1].
    # Note: this is a no-op on 'control' tasks.
    env = wrappers.CanonicalSpecWrapper(env, clip=True)
    env = wrappers.SinglePrecisionWrapper(env)
    return env
def make_environment_atari(env_name, seed):
    env = gym.make(env_name)
    env = AtariWrapper(env,
                       partial_observation_wrapper=QuadrantObservationWrapper,
                       partial_percentage=1.0,
                       seed=seed)
    env = wrappers.GymWrapper(env)

    return env
Example #4
0
def make_environment(
        name: str,
        seed: Optional[int] = None,
) -> dm_env.Environment:
    env = gym.make(name)
    env.seed(seed)
    env = wrappers.GymWrapper(env)
    
    return env
Example #5
0
def make_gym_environment(
        task_name: str = 'MountainCarContinuous-v0') -> dm_env.Environment:
    """Creates an OpenAI Gym environment."""

    # Load the gym environment.
    environment = gym.make(task_name)

    # Make sure the environment obeys the dm_env.Environment interface.
    environment = wrappers.GymWrapper(environment)
    environment = wrappers.SinglePrecisionWrapper(environment)

    return environment
Example #6
0
def create_d4rl_env(
    task_name,
):
  """Create the environment for the d4rl task.

  Args:
    task_name: Name of d4rl task.
  Returns:
    dm env.
  """
  env = gym.make(task_name)
  env = wrappers.GymWrapper(env)

  return env
Example #7
0
def make_environment(
    task: str = 'MountainCarContinuous-v0') -> dm_env.Environment:
  """Creates an OpenAI Gym environment."""

  # Load the gym environment.
  environment = gym.make(task)

  # Make sure the environment obeys the dm_env.Environment interface.
  environment = wrappers.GymWrapper(environment)
  # Clip the action returned by the agent to the environment spec.
  environment = wrappers.CanonicalSpecWrapper(environment, clip=True)
  environment = wrappers.SinglePrecisionWrapper(environment)

  return environment
Example #8
0
def make_environment(evaluation: bool = False,
                     task: str = 'HalfCheetah-v3') -> dm_env.Environment:
    """Creates an OpenAI Gym environment."""
    del evaluation

    # Load the gym environment.
    environment = gym.make(task)
    # Make sure the environment obeys the dm_env.Environment interface.
    environment = wrappers.GymWrapper(environment)
    # Clip the action returned by the agent to the environment spec.
    environment = wrappers.CanonicalSpecWrapper(environment, clip=True)
    environment = wrappers.SinglePrecisionWrapper(environment)

    return environment
Example #9
0
def make_single_agent_env(scenario: str, render=False):
    scenario = SingleAgentScenario.from_spec(scenario, rendering=render)
    env = VectorizedSingleAgentRaceEnv(scenarios=[scenario])
    env = wrap_env(env=env, wrapper_configs='single_agent_wrappers.yml')
    env = wrappers.GymWrapper(environment=env)
    env = wrappers.SinglePrecisionWrapper(env)
    return env

# def make_multi_agent_env(scenario: str, render=False, test=False):
#     scenario = MultiAgentScenario.from_spec(scenario, rendering=render)
#     env = VectorizedMultiAgentRaceEnv(scenarios=[scenario])
#     if test:
#         env = wrap_env(env=env, wrapper_configs='multi_agent_test_wrappers.yml')
#     else:
#         env = wrap_env(env=env, wrapper_configs='multi_agent_wrappers.yml')
#
#     env = MultiAgentGymWrapper(environment=env)
#     env = wrappers.SinglePrecisionWrapper(env)
#     return env
Example #10
0
def make_environment(task, evaluation = False):
  """Creates an OpenAI Gym environment."""

  # Load the gym environment.
  environment = gym.make(task)

  environment = env_wrappers.AdroitSparseRewardWrapper(environment)

  # Make sure the environment obeys the dm_env.Environment interface.
  environment = wrappers.GymWrapper(environment)
  # Clip the action returned by the agent to the environment spec.
  environment = wrappers.CanonicalSpecWrapper(environment, clip=True)
  environment = wrappers.SinglePrecisionWrapper(environment)

  if evaluation:
    environment = env_wrappers.SuccessRewardWrapper(environment,
                                                    success_threshold=1.)

  return environment
Example #11
0
from env.RSEnv import RSEnv
from env.TestRSEnv import TestRSEnv
from acme import environment_loop
from acme import specs
from acme import wrappers
from acme.agents.tf import d4pg
from acme.tf import networks
from acme.tf import utils as tf2_utils
from acme.utils import loggers
import numpy as np
import sonnet as snt

import gym

environment = RSEnv()
environment = wrappers.GymWrapper(environment)  # To dm_env interface.

# Make sure the environment outputs single-precision floats.
environment = wrappers.SinglePrecisionWrapper(environment)

# Grab the spec of the environment.
environment_spec = specs.make_environment_spec(environment)

#@title Build agent networks
# BUILDING A D4PG AGENT

# Get total number of action dimensions from action spec.
num_dimensions = np.prod(environment_spec.actions.shape, dtype=int)

# Create the shared observation network; here simply a state-less operation.
observation_network = tf2_utils.batch_concat