Beispiel #1
0
def make_environment(env_name, start_index, end_index,
                     seed):
  """Creates the environment.

  Args:
    env_name: name of the environment
    start_index: first index of the observation to use in the goal.
    end_index: final index of the observation to use in the goal. The goal
      is then obs[start_index:goal_index].
    seed: random seed.
  Returns:
    env: the environment
    obs_dim: integer specifying the size of the observations, before
      the start_index/end_index is applied.
  """
  np.random.seed(seed)
  gym_env, obs_dim, max_episode_steps = env_utils.load(env_name)
  goal_indices = obs_dim + obs_to_goal_1d(np.arange(obs_dim), start_index,
                                          end_index)
  indices = np.concatenate([
      np.arange(obs_dim),
      goal_indices
  ])
  env = gym_wrapper.GymWrapper(gym_env)
  env = step_limit.StepLimitWrapper(env, step_limit=max_episode_steps)
  env = ObservationFilterWrapper(env, indices)
  if env_name.startswith('ant_'):
    env = canonical_spec.CanonicalSpecWrapper(env)
  return env, obs_dim
  def test_gym_cartpole(self):
    env = gym_wrapper.GymWrapper(gym.make('CartPole-v0'))

    # Test converted observation spec.
    observation_spec: specs.BoundedArray = env.observation_spec()
    self.assertEqual(type(observation_spec), specs.BoundedArray)
    self.assertEqual(observation_spec.shape, (4,))
    self.assertEqual(observation_spec.minimum.shape, (4,))
    self.assertEqual(observation_spec.maximum.shape, (4,))
    self.assertEqual(observation_spec.dtype, np.dtype('float32'))

    # Test converted action spec.
    action_spec: specs.BoundedArray = env.action_spec()
    self.assertEqual(type(action_spec), specs.DiscreteArray)
    self.assertEqual(action_spec.shape, ())
    self.assertEqual(action_spec.minimum, 0)
    self.assertEqual(action_spec.maximum, 1)
    self.assertEqual(action_spec.num_values, 2)
    self.assertEqual(action_spec.dtype, np.dtype('int64'))

    # Test step.
    timestep = env.reset()
    self.assertTrue(timestep.first())
    timestep = env.step(1)
    self.assertEqual(timestep.reward, 1.0)
    self.assertEqual(timestep.observation.shape, (4,))
    env.close()
 def test_early_truncation(self):
   # Pendulum has no early termination condition.
   gym_env = gym.make('Pendulum-v0')
   env = gym_wrapper.GymWrapper(gym_env)
   ts = env.reset()
   while not ts.last():
     ts = env.step(env.action_spec().generate_value())
   self.assertEqual(ts.discount, 1.0)
   env.close()
Beispiel #4
0
def _build_environment(name, n_actions=3, max_steps=500):
  raw_env = gym.make(name)
  raw_env.action_space.n = n_actions
  raw_env.max_steps = max_steps
  env = ImgFlatObsWrapper(FullyObsWrapper(raw_env))
  env = gym_wrapper.GymWrapper(env)
  env = CustomSinglePrecisionWrapper(env)
  spec = specs.make_environment_spec(env)
  return env, spec
Beispiel #5
0
 def test_early_truncation(self):
   # Pendulum has no early termination condition. Recent versions of gym force
   # to use v1. We try both in case an earlier version is installed.
   try:
     gym_env = gym.make('Pendulum-v1')
   except:  # pylint: disable=bare-except
     gym_env = gym.make('Pendulum-v0')
   env = gym_wrapper.GymWrapper(gym_env)
   ts = env.reset()
   while not ts.last():
     ts = env.step(env.action_spec().generate_value())
   self.assertEqual(ts.discount, 1.0)
   self.assertTrue(np.isscalar(ts.reward))
   env.close()
Beispiel #6
0
 def test_basic(self):
     env = GymEnvWithInfo()
     env = gym_wrapper.GymWrapper(env)
     observer = env_info.EnvInfoObserver()
     timestep = env.reset()
     observer.observe_first(env, timestep)
     for _ in range(20):
         action = np.zeros((3, ))
         timestep = env.step(action)
         observer.observe(env, timestep, action)
     metrics = observer.get_metrics()
     self.assertLen(metrics, 3)
     np.testing.assert_equal(metrics['found_checkpoint'], 2)
     np.testing.assert_equal(metrics['picked_up_an_apple'], 1)
     np.testing.assert_equal(metrics['survival_bonus'], 20)
Beispiel #7
0
def make_environment(task,
                     end_on_success,
                     max_episode_steps,
                     distance_fn,
                     goal_image,
                     baseline_distance=None,
                     eval_mode=False,
                     logdir=None,
                     counter=None,
                     record_every=100,
                     num_episodes_to_record=3):
    """Create the environment and its wrappers."""
    env = gym.make(task)
    env = gym_wrapper.GymWrapper(env)
    if end_on_success:
        env = env_wrappers.EndOnSuccessWrapper(env)
    env = wrappers.StepLimitWrapper(env, max_episode_steps)

    env = env_wrappers.ReshapeImageWrapper(env)
    if distance_fn.history_length > 1:
        env = wrappers.FrameStackingWrapper(env, distance_fn.history_length)
    env = env_wrappers.GoalConditionedWrapper(env, goal_image)
    env = env_wrappers.DistanceModelWrapper(
        env,
        distance_fn,
        max_episode_steps,
        baseline_distance,
        distance_reward_weight=FLAGS.distance_reward_weight,
        environment_reward_weight=FLAGS.environment_reward_weight)
    if FLAGS.use_true_distance:
        env = env_wrappers.RewardWrapper(env)
    if logdir:
        env = env_wrappers.RecordEpisodesWrapper(
            env,
            counter,
            logdir,
            record_every=record_every,
            num_to_record=num_episodes_to_record,
            eval_mode=eval_mode)
    env = env_wrappers.VisibleStateWrapper(env, eval_mode)

    return single_precision.SinglePrecisionWrapper(env)
from acme.utils import loggers
from acme.wrappers import gym_wrapper

from agents.dqn_agent import DQNAgent
from networks.models import Models

from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())


def render(env):
    return env.environment.render(mode='rgb_array')


environment = gym_wrapper.GymWrapper(gym.make('LunarLander-v2'))
environment = wrappers.SinglePrecisionWrapper(environment)
environment_spec = specs.make_environment_spec(environment)

model = Models.sequential_model(
    input_shape=environment_spec.observations.shape,
    num_outputs=environment_spec.actions.num_values,
    hidden_layers=3,
    layer_size=300)

agent = DQNAgent(environment_spec=environment_spec, network=model)

logger = loggers.TerminalLogger(time_delta=10.)
loop = acme.EnvironmentLoop(environment=environment, actor=agent)
loop.run()
Beispiel #9
0
from acme.utils import loggers

import gym
import dm_env
import matplotlib.pyplot as plt
import numpy as np
import reverb
import sonnet as snt
import tensorflow as tf

from IPython.display import clear_output
clear_output()

# Load Environment:

env = gym_wrapper.GymWrapper(gym.make('MountainCarContinuous-v0'))
env = wrappers.SinglePrecisionWrapper(env)

env.environment.render(mode='rgb_array')


def render(env):
    return env.environment.render(mode='rgb_array')


environment_spec = specs.make_environment_spec(env)

# Create D4PG Agent:

# Get total number of action dimensions from action spec