def test_invalid_seeds(): for seed in [-1, 'test']: try: seeding.np_random(seed) except error.Error: pass else: assert False, 'Invalid seed {} passed validation'.format(seed)
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) high = np.array([1., 1., self.max_speed]) self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random) self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) self.action_space = spaces.Discrete(self.n, np_random=self.np_random) self.observation_space = spaces.Discrete(1, np_random=self.np_random) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf]) self.action_space = spaces.Discrete(2, np_random=self.np_random) self.observation_space = spaces.Box(-high, high, np_random=self.np_random) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) self.action_space = spaces.Discrete(2, np_random=self.np_random) self.observation_space = spaces.Tuple(( spaces.Discrete(32, np_random=self.np_random), spaces.Discrete(11, np_random=self.np_random), spaces.Discrete(2, np_random=self.np_random))) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low, high, np_random=self.np_random) self.action_space = spaces.Discrete(3, np_random=self.np_random) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) # useful range is -1 .. +1 high = np.array([np.inf]*8) # nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4, np_random=self.np_random) self.observation_space = spaces.Box(-high, high, np_random=self.np_random) return [seed]
def _seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 # Empirically, we need to seed before loading the ROM. self.ale.setInt(b'random_seed', seed2) self.ale.loadROM(self.game_path) return [seed1, seed2]
def _seed(self, seed=None): np_random, seed1 = seeding.np_random(seed) # Derive a random seed. seed2 = seeding.hash_seed(seed1 + 1) % 2**32 self.game.set_seed(seed2) # 3 allowed actions [12, 13, 14] (must match .cfg file) self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random) self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random) return [seed1, seed2]
def _seed(self, seed=None): np_random, seed1 = seeding.np_random(seed) # Derive a random seed. seed2 = seeding.hash_seed(seed1 + 1) % 2**32 self.game.set_seed(seed2) # action indexes are [0, 9, 10, 12, 13, 14] self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random) self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random) return [seed1, seed2]
def _seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. seed2 = seeding.hash_seed(seed1 + 1) % 2**32 pachi_py.pachi_srand(seed2) shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random) # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random) return [seed1, seed2]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high, np_random=self.np_random) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high, np_random=self.np_random) return [seed]
def _seed(self, seed=None): self.ale.setFloat(b'repeat_action_probability', options.repeat_action_probability) from gym.utils import seeding self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31 # Empirically, we need to seed before loading the ROM. self.ale.setInt(b'random_seed', seed2) self.ale.loadROM(self.game_path) return [seed1, seed2]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) # Update the random policy if needed if isinstance(self.opponent, str): if self.opponent == 'random': self.opponent_policy = make_random_policy(self.np_random) else: raise error.Error('Unrecognized opponent policy {}'.format(self.opponent)) else: self.opponent_policy = self.opponent return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) # One action for each board position and resign self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random) observation = self.reset() self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random) # Update the random policy if needed if isinstance(self.opponent, str): if self.opponent == 'random': self.opponent_policy = make_random_policy(self.np_random) else: raise error.Error('Unrecognized opponent policy {}'.format(self.opponent)) else: self.opponent_policy = self.opponent return [seed]
def main(): np_random, seed = seeding.np_random(BASE_SEED) print("Base seed: {}, derived seed: {}".format(BASE_SEED, seed)) rc = Client() dview = rc[:] nb_nodes = len(rc.ids) dview.push({ "simulate_episode": simulate_episode, "SIZE" : SIZE, "AGENT_STARTS": AGENT_STARTS, "AGENT_TAG": AGENT_TAG}) env = PentagoEnv(SIZE, None, agent_starts = AGENT_STARTS, to_win=SIZE) env.seed(np_random.randint(maxint)) exploiting_agent = TabularQAgent(env, AGENT_TAG, None, load_model=True, userconfig={ "eps" : 1.0 }) exploiting_agent.seed(np_random.randint(maxint)) #env.monitor.start(AGENT_TAG) episodes_pr_node = EPISODES_PR_EPOCH / nb_nodes for epoch in range(1, EPOCHS): verbose = epoch >= EPOCHS_VERBOSE_INTERVAL and epoch % EPOCHS_VERBOSE_INTERVAL == 0 if verbose: print("\nEpoch {}".format(epoch)) dview.scatter('base_seeds', np_random.randint(maxint, size=nb_nodes)) result = dview.apply_async(simulate_episodes, episodes_pr_node) #epi = 1 for _, episodes in enumerate(result): for episode in episodes: #if verbose: print("\r episode: {}".format(epi)) train_agent(exploiting_agent, episode.reset_obs, episode.feedbacks) #epi += 1 if verbose: exploiting_agent.render() save = epoch >= EPOCHS_SAVE_MODEL_INTERVAL and epoch % EPOCHS_SAVE_MODEL_INTERVAL == 0 if save: print("\n Saving model... {}") exploiting_agent.save() #env.monitor.close() env.close()
def _seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 # Empirically, we need to seed before loading the ROM. self.ale.setInt(b'random_seed', seed2) self.ale.loadROM(self.game_path) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random) (screen_width,screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random) else: raise error.Error('Unrecognized observation type: {}'.format(self._obs_type)) return [seed1, seed2]
def simulate_episodes(nb_episodes): from sys import maxint from gym.utils import seeding from pentago_ai import RandomAgent, PentagoEnv, TabularQAgent, Episode np_random, _ = seeding.np_random(base_seeds[0]) opponent_policy = RandomAgent("Player 2 Random") opponent_policy.seed(np_random.randint(maxint)) env = PentagoEnv(SIZE, opponent_policy, agent_starts = AGENT_STARTS, to_win=SIZE) env.seed(np_random.randint(maxint)) exploration_policy = RandomAgent("Player 1 Random") exploration_policy.seed(np_random.randint(maxint)) exploring_agent = TabularQAgent(env, AGENT_TAG, exploration_policy, load_model=True, userconfig={ "eps" : 0.3 }) exploring_agent.seed(np_random.randint(maxint)) episodes = [] for epi in range(nb_episodes): episodes.append(simulate_episode(env, exploring_agent)) return episodes
def seed(self, seed=None): # 产生一个随机化时需要的种子,同时返回一个np_random对象,支持后续的随机化生成操作 self.np_random, seed = seeding.np_random(seed) return [seed]
def seed(self, seed=None): gym.spaces.prng.seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed]
def _seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 32 pachi_py.pachi_srand(seed2) return [seed1, seed2]
def _seed(self, seed=0): self.np_random, seed = seeding.np_random(seed) return [seed]
def test_valid_seeds(): for seed in [0, 1]: random, seed1 = seeding.np_random(seed) assert seed == seed1
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) self.game._seed(self.np_random) return [seed]
def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31 return [seed1, seed2]
def seed(self, seed=1337): # Seed the random number generator self.np_random, _ = seeding.np_random(seed) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) high = np.array([np.inf]*24) self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random) self.observation_space = spaces.Box(-high, high, np_random=self.np_random) return [seed]
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) self.action_space = spaces.Discrete(3, np_random=self.np_random) self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random) return [seed]
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random) return [seed]
def seed(self, seed=None): self.np_random = seeding.np_random(seed)