def init(self, **kwargs): self.game = Environment(env_name=self.game_name, **kwargs) self.action_set = self.game.env.action_map self.action_space = spaces.Discrete(self.game.num_actions()) self.observation_space = spaces.Box(0.0, 1.0, shape=self.game.state_shape(), dtype=np.float32)
def __init__(self, env_id, seed=np.random.randint(int(1e5)), sticky_action_prob=0.0): random_seed(seed) # TODO: Allow sticky_action_prob and difficulty_ramping to be set by the configuration file self.env = Environment(env_id, random_seed=seed, sticky_action_prob=0.0, difficulty_ramping=False) self.name = env_id self.state_dim = self.env.state_shape() self.action_set = self.env.minimal_action_set() self.action_dim = len(self.action_set)
class Minatar(BaseEnvironment): def __init__(self, name, seed): self.env = Environment(name, random_seed=seed) def start(self): self.env.reset() s = self.env.state() return s.astype('float32') def step(self, a): r, t = self.env.act(a) sp = self.env.state().astype('float32') return (r, sp, t)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) parser.add_argument("--output", "-o", type=str) parser.add_argument("--verbose", "-v", action="store_true") parser.add_argument("--loadfile", "-l", type=str) parser.add_argument("--alpha", "-a", type=float, default=ALPHA) parser.add_argument("--save", "-s", action="store_true") parser.add_argument("--replayoff", "-r", action="store_true") parser.add_argument("--targetoff", "-t", action="store_true") args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.INFO) # If there's an output specified, then use the user specified output. Otherwise, create file in the current # directory with the game's name. if args.output: file_name = args.output else: file_name = os.getcwd() + "/" + args.game load_file_path = None if args.loadfile: load_file_path = args.loadfile env = Environment(args.game) print('Cuda available?:' + str(torch.cuda.is_available())) AC_lambda(env, file_name, args.save, load_file_path, alpha=args.alpha)
def __init__(self, game, **kwargs): self._env = env = Environment(game, **kwargs) self._action_space = Discrete(env.num_actions()) self._observation_space = Box(low=False, high=True, shape=env.state_shape(), dtype=np.bool)
class Minatar(BaseEnvironment): def __init__(self, name, seed): self.env = Environment(name, random_seed=seed) def start(self): self.env.reset() s = self.env.state() s = s.transpose(2, 0, 1) return s def step(self, a): r, t = self.env.act(a) sp = self.env.state() sp = sp.transpose(2, 0, 1) return (r, sp, t)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) parser.add_argument("--output", "-o", type=str) parser.add_argument("--verbose", "-v", action="store_true") parser.add_argument("--loadfile", "-l", type=str) parser.add_argument("--save", "-s", action="store_true") parser.add_argument("--replayoff", "-r", action="store_true") parser.add_argument("--targetoff", "-t", action="store_true") parser.add_argument("--ramp-difficulty", default=False, action="store_true") parser.add_argument("--sticky-actions", default=False, action="store_true") parser.add_argument("--save-dataset", default=False, action="store_true") parser.add_argument("--num-frames", type=int, default=5000000) args = parser.parse_args() env = Environment(args.game, sticky_action_prob=0.1 if args.sticky_actions else 0.0, difficulty_ramping=args.ramp_difficulty) num_episodes = 100 num_actions = env.num_actions() reward_per_episode = [] episode_rewards = [] env.reset() for i in range(10000000): s = env.state() action = random.randrange(num_actions) reward, terminated = env.act(action) episode_rewards.append(reward) if terminated: reward_per_episode.append(numpy.sum(episode_rewards)) episode_rewards = [] if len(reward_per_episode) == num_episodes: break env.reset() print(numpy.mean(reward_per_episode))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) parser.add_argument("--agent", "-a", type=str, default="DQN") parser.add_argument("--filename", "-f", type=str) parser.add_argument("--windowsize", "-w", type=str) parser.add_argument("--numruns", "-n", type=str) args = parser.parse_args() env = Environment(args.game) network_param = find_best_run(args.filename, int(args.numruns), int(args.windowsize), args.agent) run_agent(env, network_param, args.agent)
def run_abstract_agent_(self): def classify(state): _, log_probs = self.model.encode(state[np.newaxis], hand_states=np.zeros( (1, 1), dtype=np.int32)) log_probs = log_probs[0] if self.hard_abstract_state: idx = np.argmax(log_probs) output = np.zeros_like(log_probs) output[idx] = 1.0 else: output = np.exp(log_probs) return output self.game_env = Environment(self.game, difficulty_ramping=False, sticky_action_prob=0.0) q_values = self.cluster_q_values rewards_name = "rewards_q_values" gifs_name = "gifs_q_values" self.abstract_agent = QuotientMDPNBisim(classify, self.game_env, q_values, minatar=True) if self.save_gifs: gifs_path = self.saver.get_new_dir(gifs_name) else: gifs_path = None solver = SolverMinAtar(self.game_env, self.abstract_agent, int(1e+7), int(1e+7), 0, max_episodes=100, train=False, gif_save_path=gifs_path, rewards_file=self.saver.get_save_file( rewards_name, "dat")) solver.run()
class MiniAtariTask: def __init__(self, env_id, seed=np.random.randint(int(1e5)), sticky_action_prob=0.0): random_seed(seed) # TODO: Allow sticky_action_prob and difficulty_ramping to be set by the configuration file self.env = Environment(env_id, random_seed=seed, sticky_action_prob=0.0, difficulty_ramping=False) self.name = env_id self.state_dim = self.env.state_shape() self.action_set = self.env.minimal_action_set() self.action_dim = len(self.action_set) def reset(self): self.env.reset() return self.env.state().flatten() def step(self, actions): rew, done = self.env.act(self.action_set[actions[0]]) obs = self.reset() if done else self.env.state() return obs.flatten(), np.asarray(rew), np.asarray(done), ""
default=1) params = parser.parse_args() if params.game == "all": games = ["seaquest", "asterix", "breakout", "freeway", "space_invaders"] else: games = [params.game] for i in range(params.n_seeds): for game in games: seed = random.randint(0, 1e6) notes = "DQN" env = Environment(game, random_seed=seed) env = MinatarWrapper(env) nb_steps = params.nb_steps agent = DQN(env, CNNMinAtar, replay_start_size=5000, replay_buffer_size=100000, gamma=0.99, update_target_frequency=1000, minibatch_size=32, learning_rate=1e-4, initial_exploration_rate=1, final_exploration_rate=0.03, final_exploration_step=100000, adam_epsilon=1e-8,
class BaseEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, display_time=50, **kwargs): self.game_name = 'Game Name' self.display_time = display_time self.init(**kwargs) def init(self, **kwargs): self.game = Environment(env_name=self.game_name, **kwargs) self.action_set = self.game.env.action_map self.action_space = spaces.Discrete(self.game.num_actions()) self.observation_space = spaces.Box(0.0, 1.0, shape=self.game.state_shape(), dtype=np.float32) def step(self, action): reward, done = self.game.act(action) return (self.game.state(), reward, done, {}) def reset(self): self.game.reset() return self.game.state() def seed(self, seed=None): self.game = Environment(env_name=self.game_name, random_seed=seed) return seed def render(self, mode='human'): if mode == 'rgb_array': return self.game.state() elif mode == 'human': self.game.display_state(self.display_time) def close(self): if self.game.visualized: self.game.close_display() return 0
def seed(self, seed=None): self.game = Environment(env_name=self.game_name, random_seed=seed) return seed
def main(): parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) parser.add_argument("--output", "-o", type=str) parser.add_argument("--verbose", "-v", action="store_true") parser.add_argument("--loadfile", "-l", type=str) parser.add_argument("--alpha", "-a", type=float, default=STEP_SIZE) parser.add_argument("--save", "-s", action="store_true") parser.add_argument("--replayoff", "-r", action="store_true") parser.add_argument("--targetoff", "-t", action="store_true") parser.add_argument("--ramp-difficulty", default=False, action="store_true") parser.add_argument("--sticky-actions", default=False, action="store_true") parser.add_argument("--save-dataset", default=False, action="store_true") parser.add_argument("--num-frames", type=int, default=5000000) args = parser.parse_args() global NUM_FRAMES NUM_FRAMES = args.num_frames if args.verbose: logging.basicConfig(level=logging.INFO) # If there's an output specified, then use the user specified output. Otherwise, create file in the current # directory with the game's name. if args.output: file_name = args.output else: file_name = os.getcwd() + "/" + args.game load_file_path = None if args.loadfile: load_file_path = args.loadfile env = Environment(args.game, sticky_action_prob=0.1 if args.sticky_actions else 0.0, difficulty_ramping=args.ramp_difficulty) print('Cuda available?: ' + str(torch.cuda.is_available())) policy_net = dqn(env, args.replayoff, args.targetoff, file_name, args.save, load_file_path, args.alpha) if args.save_dataset: epsilon = 0.1 num_steps = 100000 num_actions = env.num_actions() transitions = [] env.reset() for i in range(num_steps): if i % 1000 == 0: logging.info("data collection step {:d}".format(i)) s = env.state() s_t = get_state(s) with torch.no_grad(): q_values = policy_net(s_t) if numpy.random.uniform(0, 1) < epsilon: action = torch.tensor([[random.randrange(num_actions)]], device=device) else: action = q_values.max(1)[1].view(1, 1) reward, terminated = env.act(action) s_prime = env.state() s_prime_t = get_state(s_prime) with torch.no_grad(): q_values_prime = policy_net(s_prime_t) t = Transition(s, int(action.cpu().numpy()[0, 0]), float(reward), s_prime, False, bool(terminated), q_values=q_values.cpu().numpy(), next_q_values=q_values_prime.cpu().numpy()) transitions.append(t) if terminated: env.reset() file_name = os.path.join("dataset", "{:s}.pickle".format(args.game)) with open(file_name, "wb") as file: pickle.dump(transitions, file)
def __init__(self, name, seed): self.env = Environment(name, random_seed=seed)
def run_abstract_agent_(self, cluster_q_values=False, cluster_q_values_from_model=False, learned_cluster_q_values=False, new_transitions=False, rewards_no_actions=False, failure_rewards=False, soft_failure_rewards=False): def classify(state): _, log_probs = self.model.encode(state[np.newaxis], hand_states=np.zeros( (1, 1), dtype=np.int32)) log_probs = log_probs[0] if self.hard_abstract_state: idx = np.argmax(log_probs) output = np.zeros_like(log_probs) output[idx] = 1.0 elif self.sample_abstract_state: idx = np.random.choice(list(range(len(log_probs))), p=np.exp(log_probs)) output = np.zeros_like(log_probs) output[idx] = 1.0 else: output = np.exp(log_probs) return output self.game_env = Environment(self.game, difficulty_ramping=False, sticky_action_prob=0.0) if cluster_q_values: if cluster_q_values_from_model: q_values = self.model.session.run(self.model.cluster_qs_v) rewards_name = "rewards_q_values_from_model" gifs_name = "gifs_q_values_from_model" else: q_values = self.cluster_q_values rewards_name = "rewards_q_values" gifs_name = "gifs_q_values" elif learned_cluster_q_values: q_values = self.learned_qs rewards_name = "rewards_learned_q_values" gifs_name = "gifs_learned_q_values" else: q_values = self.abstract_mdp.state_action_values rewards_name = "rewards" gifs_name = "gifs" if new_transitions: rewards_name += "_new_t" gifs_name += "_new_t" if rewards_no_actions: rewards_name += "_r_no_a" gifs_name += "_r_no_a" if failure_rewards: rewards_name += "_failure_r" gifs_name += "_failure_r" if soft_failure_rewards: rewards_name += "_soft_failure_r" gifs_name += "_soft_failure_r" self.abstract_agent = QuotientMDPNBisim( classify, self.game_env, q_values, minatar=True, softmax_policy=self.softmax_policy, softmax_policy_temp=self.softmax_policy_temp) if self.save_gifs: gifs_path = self.saver.get_new_dir(gifs_name) else: gifs_path = None solver = SolverMinAtar(self.game_env, self.abstract_agent, int(1e+7), int(1e+7), 0, max_episodes=self.eval_episodes, train=False, gif_save_path=gifs_path, rewards_file=self.saver.get_save_file( rewards_name, "dat")) solver.run()
# Authors: # # Kenny Young ([email protected]) # # Tian Tian([email protected]) # # # # python3 random_play.py -g <game> # # ################################################################################################################ import random, numpy, argparse from minatar import Environment NUM_EPISODES = 1000 parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) args = parser.parse_args() env = Environment(args.game) e = 0 returns = [] num_actions = env.num_actions() # Run NUM_EPISODES episodes and log all returns while e < NUM_EPISODES: # Initialize the return for every episode G = 0.0 # Initialize the environment env.reset() terminated = False #Obtain first state, unused by random agent, but inluded for illustration
import argparse import tkinter as Tk from minatar import Environment, GUI ################################################################################################################ # Script that allows a human to play any of the MinAtar games. Use arrow keys to move and space to fire. # Pressing q will exit the game, r will restart. # ################################################################################################################ parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) args = parser.parse_args() # Setup game environment and GUI env = Environment(args.game) gui = GUI(env.game_name(), env.n_channels) # Thread safe variables for use with GUI action = Tk.IntVar() action.set(0) action_taken = Tk.BooleanVar() action_taken.set(False) action_released = Tk.BooleanVar() action_released.set(False) G = Tk.DoubleVar() G.set(0.0) is_terminate = Tk.BooleanVar() is_terminate.set(False) # Map input keys to agent actions