def generate_rollout_hash(spec): spaces.seed(0) env = spec.make() env.seed(0) observation_list = [] action_list = [] reward_list = [] done_list = [] total_steps = 0 for episode in range(episodes): if total_steps >= ROLLOUT_STEPS: break observation = env.reset() for step in range(steps): action = env.action_space.sample() observation, reward, done, _ = env.step(action) action_list.append(action) observation_list.append(observation) reward_list.append(reward) done_list.append(done) total_steps += 1 if total_steps >= ROLLOUT_STEPS: break if done: break observations_hash = hash_object(observation_list) actions_hash = hash_object(action_list) rewards_hash = hash_object(reward_list) dones_hash = hash_object(done_list) return observations_hash, actions_hash, rewards_hash, dones_hash
def test_env(spec): if should_skip_env_spec_for_tests(spec): return # Note that this precludes running this test in multiple # threads. However, we probably already can't do multithreading # due to some environments. spaces.seed(0) env1 = spec.make() env1.seed(0) action_samples1 = [env1.action_space.sample() for i in range(4)] observation_samples1 = [env1.observation_space.sample() for i in range(4)] initial_observation1 = env1.reset() step_responses1 = [env1.step(action) for action in action_samples1] env1.close() spaces.seed(0) env2 = spec.make() env2.seed(0) action_samples2 = [env2.action_space.sample() for i in range(4)] observation_samples2 = [env2.observation_space.sample() for i in range(4)] initial_observation2 = env2.reset() step_responses2 = [env2.step(action) for action in action_samples2] env2.close() for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)): assert_equals(action_sample1, action_sample2 ), '[{}] action_sample1: {}, action_sample2: {}'.format( i, action_sample1, action_sample2) for (observation_sample1, observation_sample2) in zip(observation_samples1, observation_samples2): assert_equals(observation_sample1, observation_sample2) # Don't check rollout equality if it's a a nondeterministic # environment. if spec.nondeterministic: return assert_equals(initial_observation1, initial_observation2) for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)): assert_equals(o1, o2, '[{}] '.format(i)) assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2) assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2) # Go returns a Pachi game board in info, which doesn't # properly check equality. For now, we hack around this by # just skipping Go. if spec.id not in ['Go9x9-v0', 'Go19x19-v0']: assert_equals(i1, i2, '[{}] '.format(i))
def test_env(spec): # Skip mujoco tests for pull request CI skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco'))) if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'): return # TODO(jonas 2016-05-11): Re-enable these tests after fixing box2d-py if spec._entry_point.startswith('gym.envs.box2d:'): logger.warn("Skipping tests for box2d env {}".format(spec._entry_point)) return # Note that this precludes running this test in multiple # threads. However, we probably already can't do multithreading # due to some environments. spaces.seed(0) env1 = spec.make() env1.seed(0) action_samples1 = [env1.action_space.sample() for i in range(4)] observation_samples1 = [env1.observation_space.sample() for i in range(4)] initial_observation1 = env1.reset() step_responses1 = [env1.step(action) for action in action_samples1] env1.close() spaces.seed(0) env2 = spec.make() env2.seed(0) action_samples2 = [env2.action_space.sample() for i in range(4)] observation_samples2 = [env2.observation_space.sample() for i in range(4)] initial_observation2 = env2.reset() step_responses2 = [env2.step(action) for action in action_samples2] env2.close() for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)): assert np.array_equal(action_sample1, action_sample2), '[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2) for i, (observation_sample1, observation_sample2) in enumerate(zip(observation_samples1, observation_samples2)): # Allows for NaNs np.testing.assert_array_equal(observation_sample1, observation_sample2) # Don't check rollout equality if it's a a nondeterministic # environment. if spec.nondeterministic: return assert np.array_equal(initial_observation1, initial_observation2), 'initial_observation1: {}, initial_observation2: {}'.format(initial_observation1, initial_observation2) for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)): assert_equals(o1, o2, '[{}] '.format(i)) assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2) assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2) # Go returns a Pachi game board in info, which doesn't # properly check equality. For now, we hack around this by # just skipping Go. if spec.id not in ['Go9x9-v0', 'Go19x19-v0']: assert_equals(i1, i2, '[{}] '.format(i))
def test_env(spec): if should_skip_env_spec_for_tests(spec): return # Note that this precludes running this test in multiple # threads. However, we probably already can't do multithreading # due to some environments. spaces.seed(0) env1 = spec.make() env1.seed(0) action_samples1 = [env1.action_space.sample() for i in range(4)] observation_samples1 = [env1.observation_space.sample() for i in range(4)] initial_observation1 = env1.reset() step_responses1 = [env1.step(action) for action in action_samples1] env1.close() spaces.seed(0) env2 = spec.make() env2.seed(0) action_samples2 = [env2.action_space.sample() for i in range(4)] observation_samples2 = [env2.observation_space.sample() for i in range(4)] initial_observation2 = env2.reset() step_responses2 = [env2.step(action) for action in action_samples2] env2.close() for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)): assert_equals(action_sample1, action_sample2), "[{}] action_sample1: {}, action_sample2: {}".format( i, action_sample1, action_sample2 ) for (observation_sample1, observation_sample2) in zip(observation_samples1, observation_samples2): assert_equals(observation_sample1, observation_sample2) # Don't check rollout equality if it's a a nondeterministic # environment. if spec.nondeterministic: return assert_equals(initial_observation1, initial_observation2) for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)): assert_equals(o1, o2, "[{}] ".format(i)) assert r1 == r2, "[{}] r1: {}, r2: {}".format(i, r1, r2) assert d1 == d2, "[{}] d1: {}, d2: {}".format(i, d1, d2) # Go returns a Pachi game board in info, which doesn't # properly check equality. For now, we hack around this by # just skipping Go. if spec.id not in ["Go9x9-v0", "Go19x19-v0"]: assert_equals(i1, i2, "[{}] ".format(i))
def __init__(self,n=10,space_seed=0): self.n=n self.action_space = spaces.Discrete(self.n +1) #there is an option NOT to flip any bit( index = n) self.observation_space = spaces.MultiBinary(self.n) self.reward_range = (-1,0) spaces.seed(space_seed) self.initial_state = self.observation_space.sample() self.goal = self.observation_space.sample() self.state = self.initial_state self.envstepcount = 0 #self.seed() self.reward_max = -np.sum(np.bitwise_xor(self.initial_state,self.goal))+1 if(np.array_equal(self.goal,self.initial_state)): self.reward_max = 0
def __init__(self, rows=8, cols=8, mines=10): seed() # Initialize RNG self.action_space = Tuple((Discrete(rows), Discrete(cols))) self.rows = rows self.cols = cols self.mines = mines self.nonMines = rows * cols - mines self.clickedCoords = set() self.letter_Axis = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't' ] self.chosenCoords = [] self.state = np.full([self.rows, self.cols], Minefield.UNKNOWN) self.neighboring_mines = 0
def __init__(self): seed() self.seed() #self.img = Image.open(r"E:\gym\gym\envs\pap\012.bmp") self.img = Image.open(r"E:\gym\gym\envs\pap\012Ori.bmp") #self.mask = np.asarray(Image.open(r"E:\gym\gym\envs\pap\012.png")) self.mask = np.asarray(Image.open(r"E:\gym\gym\envs\pap\012Ori.png")) print(self.mask) self.width, self.height = self.img.size self.action_space = spaces.Discrete(256) self.observation_space = spaces.Discrete(4) self.img_array = np.asarray(self.img.convert('L')) self.guess_count = 0 self.guess_max = 100 self.observation = 0 self.reset()
def test_nice_vnc_semantics_match(spec, matcher, wrapper): # Check that when running over VNC or using the raw environment, # semantics match exactly. gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) spaces.seed(0) vnc_env = spec.make() if vnc_env.metadata.get('configure.required', False): vnc_env.configure(remotes=1) vnc_env = wrapper(vnc_env) vnc_env = wrappers.Unvectorize(vnc_env) env = gym.make(spec._kwargs['gym_core_id']) env.seed(0) vnc_env.seed(0) # Check that reset observations work reset(matcher, env, vnc_env, stage='initial reset') # Check a full rollout rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps') # Reset to start a new episode reset(matcher, env, vnc_env, stage='reset to new episode') # Check that a step into the next episode works rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode') # Make sure env can be reseeded env.seed(1) vnc_env.seed(1) reset(matcher, env, vnc_env, 'reseeded reset') rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
def __init__(self): print("--------------------------------------") print("| IMAGE: ", TRAIN_IMAGE, "MASK: ", TRAIN_MASK, " |") print("--------------------------------------") seed() self.seed() script_dir = dirname(__file__) self.img = Image.open(abspath(join(script_dir, "..", "data", TRAIN_IMAGE))) self.mask = np.asarray(Image.open(abspath(join(script_dir, "..", "data", TRAIN_MASK)))) self.mask_zero_count = np.count_nonzero(self.mask[...,0]==0) self.width, self.height = self.img.size self.action_space = spaces.Discrete(256) self.observation_space = spaces.Discrete(4) self.img_array = np.asarray(self.img.convert('L')) self.guess_count = 0 self.guess_max = 100 self.observation = 0 self.reset()
def __init__(self, rows=8, cols=8, mines=10): seed() # Initialize RNG self.action_space = Tuple((Discrete(rows), Discrete(cols))) self.rows = rows self.cols = cols self.mines = mines
def seed(self, seed): """Sets the seed for this env's random number generator(s). """ spaces.seed(seed)