Esempio n. 1
0
def generate_rollout_hash(spec):
  spaces.seed(0)
  env = spec.make()
  env.seed(0)

  observation_list = []
  action_list = []
  reward_list = []
  done_list = []

  total_steps = 0
  for episode in range(episodes):
    if total_steps >= ROLLOUT_STEPS: break
    observation = env.reset()

    for step in range(steps):
      action = env.action_space.sample()
      observation, reward, done, _ = env.step(action)

      action_list.append(action)
      observation_list.append(observation)
      reward_list.append(reward)
      done_list.append(done)

      total_steps += 1
      if total_steps >= ROLLOUT_STEPS: break

      if done: break

  observations_hash = hash_object(observation_list)
  actions_hash = hash_object(action_list)
  rewards_hash = hash_object(reward_list)
  dones_hash = hash_object(done_list)

  return observations_hash, actions_hash, rewards_hash, dones_hash
Esempio n. 2
0
def test_env(spec):
    if should_skip_env_spec_for_tests(spec):
        return

    # Note that this precludes running this test in multiple
    # threads. However, we probably already can't do multithreading
    # due to some environments.
    spaces.seed(0)

    env1 = spec.make()
    env1.seed(0)
    action_samples1 = [env1.action_space.sample() for i in range(4)]
    observation_samples1 = [env1.observation_space.sample() for i in range(4)]
    initial_observation1 = env1.reset()
    step_responses1 = [env1.step(action) for action in action_samples1]
    env1.close()

    spaces.seed(0)

    env2 = spec.make()
    env2.seed(0)
    action_samples2 = [env2.action_space.sample() for i in range(4)]
    observation_samples2 = [env2.observation_space.sample() for i in range(4)]
    initial_observation2 = env2.reset()
    step_responses2 = [env2.step(action) for action in action_samples2]
    env2.close()

    for i, (action_sample1,
            action_sample2) in enumerate(zip(action_samples1,
                                             action_samples2)):
        assert_equals(action_sample1, action_sample2
                      ), '[{}] action_sample1: {}, action_sample2: {}'.format(
                          i, action_sample1, action_sample2)

    for (observation_sample1,
         observation_sample2) in zip(observation_samples1,
                                     observation_samples2):
        assert_equals(observation_sample1, observation_sample2)

    # Don't check rollout equality if it's a a nondeterministic
    # environment.
    if spec.nondeterministic:
        return

    assert_equals(initial_observation1, initial_observation2)

    for i, ((o1, r1, d1, i1),
            (o2, r2, d2,
             i2)) in enumerate(zip(step_responses1, step_responses2)):
        assert_equals(o1, o2, '[{}] '.format(i))
        assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
        assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)

        # Go returns a Pachi game board in info, which doesn't
        # properly check equality. For now, we hack around this by
        # just skipping Go.
        if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
            assert_equals(i1, i2, '[{}] '.format(i))
Esempio n. 3
0
def test_env(spec):
    # Skip mujoco tests for pull request CI
    skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
    if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
        return

    # TODO(jonas 2016-05-11): Re-enable these tests after fixing box2d-py
    if spec._entry_point.startswith('gym.envs.box2d:'):
        logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
        return

    # Note that this precludes running this test in multiple
    # threads. However, we probably already can't do multithreading
    # due to some environments.
    spaces.seed(0)

    env1 = spec.make()
    env1.seed(0)
    action_samples1 = [env1.action_space.sample() for i in range(4)]
    observation_samples1 = [env1.observation_space.sample() for i in range(4)]
    initial_observation1 = env1.reset()
    step_responses1 = [env1.step(action) for action in action_samples1]
    env1.close()

    spaces.seed(0)

    env2 = spec.make()
    env2.seed(0)
    action_samples2 = [env2.action_space.sample() for i in range(4)]
    observation_samples2 = [env2.observation_space.sample() for i in range(4)]
    initial_observation2 = env2.reset()
    step_responses2 = [env2.step(action) for action in action_samples2]
    env2.close()

    for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
        assert np.array_equal(action_sample1, action_sample2), '[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2)

    for i, (observation_sample1, observation_sample2) in enumerate(zip(observation_samples1, observation_samples2)):
        # Allows for NaNs
        np.testing.assert_array_equal(observation_sample1, observation_sample2)

    # Don't check rollout equality if it's a a nondeterministic
    # environment.
    if spec.nondeterministic:
        return

    assert np.array_equal(initial_observation1, initial_observation2), 'initial_observation1: {}, initial_observation2: {}'.format(initial_observation1, initial_observation2)

    for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
        assert_equals(o1, o2, '[{}] '.format(i))
        assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
        assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)

        # Go returns a Pachi game board in info, which doesn't
        # properly check equality. For now, we hack around this by
        # just skipping Go.
        if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
            assert_equals(i1, i2, '[{}] '.format(i))
def test_env(spec):
    if should_skip_env_spec_for_tests(spec):
        return

    # Note that this precludes running this test in multiple
    # threads. However, we probably already can't do multithreading
    # due to some environments.
    spaces.seed(0)

    env1 = spec.make()
    env1.seed(0)
    action_samples1 = [env1.action_space.sample() for i in range(4)]
    observation_samples1 = [env1.observation_space.sample() for i in range(4)]
    initial_observation1 = env1.reset()
    step_responses1 = [env1.step(action) for action in action_samples1]
    env1.close()

    spaces.seed(0)

    env2 = spec.make()
    env2.seed(0)
    action_samples2 = [env2.action_space.sample() for i in range(4)]
    observation_samples2 = [env2.observation_space.sample() for i in range(4)]
    initial_observation2 = env2.reset()
    step_responses2 = [env2.step(action) for action in action_samples2]
    env2.close()

    for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
        assert_equals(action_sample1, action_sample2), "[{}] action_sample1: {}, action_sample2: {}".format(
            i, action_sample1, action_sample2
        )

    for (observation_sample1, observation_sample2) in zip(observation_samples1, observation_samples2):
        assert_equals(observation_sample1, observation_sample2)

    # Don't check rollout equality if it's a a nondeterministic
    # environment.
    if spec.nondeterministic:
        return

    assert_equals(initial_observation1, initial_observation2)

    for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
        assert_equals(o1, o2, "[{}] ".format(i))
        assert r1 == r2, "[{}] r1: {}, r2: {}".format(i, r1, r2)
        assert d1 == d2, "[{}] d1: {}, d2: {}".format(i, d1, d2)

        # Go returns a Pachi game board in info, which doesn't
        # properly check equality. For now, we hack around this by
        # just skipping Go.
        if spec.id not in ["Go9x9-v0", "Go19x19-v0"]:
            assert_equals(i1, i2, "[{}] ".format(i))
Esempio n. 5
0
 def __init__(self,n=10,space_seed=0):
   self.n=n    
   self.action_space = spaces.Discrete(self.n +1) #there is an option NOT to flip any bit( index = n)
   self.observation_space = spaces.MultiBinary(self.n)
   self.reward_range = (-1,0)
   spaces.seed(space_seed)
   self.initial_state = self.observation_space.sample()
   self.goal = self.observation_space.sample()
   self.state = self.initial_state
   self.envstepcount = 0
   #self.seed()
   self.reward_max = -np.sum(np.bitwise_xor(self.initial_state,self.goal))+1
   if(np.array_equal(self.goal,self.initial_state)):
      self.reward_max = 0
Esempio n. 6
0
 def __init__(self, rows=8, cols=8, mines=10):
     seed()  # Initialize RNG
     self.action_space = Tuple((Discrete(rows), Discrete(cols)))
     self.rows = rows
     self.cols = cols
     self.mines = mines
     self.nonMines = rows * cols - mines
     self.clickedCoords = set()
     self.letter_Axis = [
         'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
         'n', 'o', 'p', 'q', 'r', 's', 't'
     ]
     self.chosenCoords = []
     self.state = np.full([self.rows, self.cols], Minefield.UNKNOWN)
     self.neighboring_mines = 0
Esempio n. 7
0
    def __init__(self):
        seed()
        self.seed()
        #self.img = Image.open(r"E:\gym\gym\envs\pap\012.bmp")
        self.img = Image.open(r"E:\gym\gym\envs\pap\012Ori.bmp")
        #self.mask = np.asarray(Image.open(r"E:\gym\gym\envs\pap\012.png"))
        self.mask = np.asarray(Image.open(r"E:\gym\gym\envs\pap\012Ori.png"))
        print(self.mask)

        self.width, self.height = self.img.size

        self.action_space = spaces.Discrete(256)
        self.observation_space = spaces.Discrete(4)

        self.img_array = np.asarray(self.img.convert('L'))

        self.guess_count = 0
        self.guess_max = 100
        self.observation = 0

        self.reset()
def test_nice_vnc_semantics_match(spec, matcher, wrapper):
    # Check that when running over VNC or using the raw environment,
    # semantics match exactly.
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    spaces.seed(0)

    vnc_env = spec.make()
    if vnc_env.metadata.get('configure.required', False):
        vnc_env.configure(remotes=1)
    vnc_env = wrapper(vnc_env)
    vnc_env = wrappers.Unvectorize(vnc_env)

    env = gym.make(spec._kwargs['gym_core_id'])

    env.seed(0)
    vnc_env.seed(0)

    # Check that reset observations work
    reset(matcher, env, vnc_env, stage='initial reset')

    # Check a full rollout
    rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps')

    # Reset to start a new episode
    reset(matcher, env, vnc_env, stage='reset to new episode')

    # Check that a step into the next episode works
    rollout(matcher,
            env,
            vnc_env,
            timestep_limit=1,
            stage='1 step in new episode')

    # Make sure env can be reseeded
    env.seed(1)
    vnc_env.seed(1)
    reset(matcher, env, vnc_env, 'reseeded reset')
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
Esempio n. 9
0
def test_nice_vnc_semantics_match(spec, matcher, wrapper):
    # Check that when running over VNC or using the raw environment,
    # semantics match exactly.
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    spaces.seed(0)

    vnc_env = spec.make()
    if vnc_env.metadata.get('configure.required', False):
        vnc_env.configure(remotes=1)
    vnc_env = wrapper(vnc_env)
    vnc_env = wrappers.Unvectorize(vnc_env)

    env = gym.make(spec._kwargs['gym_core_id'])

    env.seed(0)
    vnc_env.seed(0)

    # Check that reset observations work
    reset(matcher, env, vnc_env, stage='initial reset')

    # Check a full rollout
    rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps')

    # Reset to start a new episode
    reset(matcher, env, vnc_env, stage='reset to new episode')

    # Check that a step into the next episode works
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode')

    # Make sure env can be reseeded
    env.seed(1)
    vnc_env.seed(1)
    reset(matcher, env, vnc_env, 'reseeded reset')
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
    def __init__(self):
        print("--------------------------------------")
        print("| IMAGE: ", TRAIN_IMAGE, "MASK: ", TRAIN_MASK, "    |")
        print("--------------------------------------")
        seed()
        self.seed()

        script_dir = dirname(__file__)
        self.img = Image.open(abspath(join(script_dir, "..", "data", TRAIN_IMAGE)))
        self.mask = np.asarray(Image.open(abspath(join(script_dir, "..", "data", TRAIN_MASK))))
        self.mask_zero_count = np.count_nonzero(self.mask[...,0]==0)

        self.width, self.height = self.img.size

        self.action_space = spaces.Discrete(256)
        self.observation_space = spaces.Discrete(4)

        self.img_array = np.asarray(self.img.convert('L'))

        self.guess_count = 0
        self.guess_max = 100
        self.observation = 0

        self.reset()
Esempio n. 11
0
 def __init__(self, rows=8, cols=8, mines=10):
     seed()  # Initialize RNG
     self.action_space = Tuple((Discrete(rows), Discrete(cols)))
     self.rows = rows
     self.cols = cols
     self.mines = mines
Esempio n. 12
0
 def seed(self, seed):
     """Sets the seed for this env's random number generator(s).
     """
     spaces.seed(seed)