Beispiel #1
0
 def init(self, **kwargs):
     self.game = Environment(env_name=self.game_name, **kwargs)
     self.action_set = self.game.env.action_map
     self.action_space = spaces.Discrete(self.game.num_actions())
     self.observation_space = spaces.Box(0.0,
                                         1.0,
                                         shape=self.game.state_shape(),
                                         dtype=np.float32)
Beispiel #2
0
 def __init__(self, env_id, seed=np.random.randint(int(1e5)), sticky_action_prob=0.0):
     random_seed(seed)
     # TODO: Allow sticky_action_prob and difficulty_ramping to be set by the configuration file
     self.env = Environment(env_id, random_seed=seed, sticky_action_prob=0.0, difficulty_ramping=False)
     self.name = env_id
     self.state_dim = self.env.state_shape()
     self.action_set = self.env.minimal_action_set()
     self.action_dim = len(self.action_set)
Beispiel #3
0
class Minatar(BaseEnvironment):
    def __init__(self, name, seed):
        self.env = Environment(name, random_seed=seed)

    def start(self):
        self.env.reset()
        s = self.env.state()
        return s.astype('float32')

    def step(self, a):
        r, t = self.env.act(a)
        sp = self.env.state().astype('float32')

        return (r, sp, t)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--output", "-o", type=str)
    parser.add_argument("--verbose", "-v", action="store_true")
    parser.add_argument("--loadfile", "-l", type=str)
    parser.add_argument("--alpha", "-a", type=float, default=ALPHA)
    parser.add_argument("--save", "-s", action="store_true")
    parser.add_argument("--replayoff", "-r", action="store_true")
    parser.add_argument("--targetoff", "-t", action="store_true")
    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    # If there's an output specified, then use the user specified output.  Otherwise, create file in the current
    # directory with the game's name.
    if args.output:
        file_name = args.output
    else:
        file_name = os.getcwd() + "/" + args.game

    load_file_path = None
    if args.loadfile:
        load_file_path = args.loadfile

    env = Environment(args.game)

    print('Cuda available?:' + str(torch.cuda.is_available()))
    AC_lambda(env, file_name, args.save, load_file_path, alpha=args.alpha)
Beispiel #5
0
 def __init__(self, game, **kwargs):
     self._env = env = Environment(game, **kwargs)
     self._action_space = Discrete(env.num_actions())
     self._observation_space = Box(low=False,
                                   high=True,
                                   shape=env.state_shape(),
                                   dtype=np.bool)
class Minatar(BaseEnvironment):
    def __init__(self, name, seed):
        self.env = Environment(name, random_seed=seed)

    def start(self):
        self.env.reset()
        s = self.env.state()
        s = s.transpose(2, 0, 1)
        return s

    def step(self, a):
        r, t = self.env.act(a)
        sp = self.env.state()
        sp = sp.transpose(2, 0, 1)

        return (r, sp, t)
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--output", "-o", type=str)
    parser.add_argument("--verbose", "-v", action="store_true")
    parser.add_argument("--loadfile", "-l", type=str)
    parser.add_argument("--save", "-s", action="store_true")
    parser.add_argument("--replayoff", "-r", action="store_true")
    parser.add_argument("--targetoff", "-t", action="store_true")
    parser.add_argument("--ramp-difficulty",
                        default=False,
                        action="store_true")
    parser.add_argument("--sticky-actions", default=False, action="store_true")
    parser.add_argument("--save-dataset", default=False, action="store_true")
    parser.add_argument("--num-frames", type=int, default=5000000)
    args = parser.parse_args()

    env = Environment(args.game,
                      sticky_action_prob=0.1 if args.sticky_actions else 0.0,
                      difficulty_ramping=args.ramp_difficulty)

    num_episodes = 100
    num_actions = env.num_actions()

    reward_per_episode = []
    episode_rewards = []

    env.reset()

    for i in range(10000000):

        s = env.state()

        action = random.randrange(num_actions)
        reward, terminated = env.act(action)

        episode_rewards.append(reward)

        if terminated:
            reward_per_episode.append(numpy.sum(episode_rewards))
            episode_rewards = []

            if len(reward_per_episode) == num_episodes:
                break

            env.reset()

    print(numpy.mean(reward_per_episode))
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--agent", "-a", type=str, default="DQN")
    parser.add_argument("--filename", "-f", type=str)
    parser.add_argument("--windowsize", "-w", type=str)
    parser.add_argument("--numruns", "-n", type=str)
    args = parser.parse_args()

    env = Environment(args.game)

    network_param = find_best_run(args.filename, int(args.numruns), int(args.windowsize), args.agent)
    run_agent(env, network_param, args.agent)
    def run_abstract_agent_(self):
        def classify(state):

            _, log_probs = self.model.encode(state[np.newaxis],
                                             hand_states=np.zeros(
                                                 (1, 1), dtype=np.int32))
            log_probs = log_probs[0]

            if self.hard_abstract_state:
                idx = np.argmax(log_probs)
                output = np.zeros_like(log_probs)
                output[idx] = 1.0
            else:
                output = np.exp(log_probs)

            return output

        self.game_env = Environment(self.game,
                                    difficulty_ramping=False,
                                    sticky_action_prob=0.0)

        q_values = self.cluster_q_values
        rewards_name = "rewards_q_values"
        gifs_name = "gifs_q_values"

        self.abstract_agent = QuotientMDPNBisim(classify,
                                                self.game_env,
                                                q_values,
                                                minatar=True)

        if self.save_gifs:
            gifs_path = self.saver.get_new_dir(gifs_name)
        else:
            gifs_path = None

        solver = SolverMinAtar(self.game_env,
                               self.abstract_agent,
                               int(1e+7),
                               int(1e+7),
                               0,
                               max_episodes=100,
                               train=False,
                               gif_save_path=gifs_path,
                               rewards_file=self.saver.get_save_file(
                                   rewards_name, "dat"))
        solver.run()
Beispiel #10
0
class MiniAtariTask:
    def __init__(self, env_id, seed=np.random.randint(int(1e5)), sticky_action_prob=0.0):
        random_seed(seed)
        # TODO: Allow sticky_action_prob and difficulty_ramping to be set by the configuration file
        self.env = Environment(env_id, random_seed=seed, sticky_action_prob=0.0, difficulty_ramping=False)
        self.name = env_id
        self.state_dim = self.env.state_shape()
        self.action_set = self.env.minimal_action_set()
        self.action_dim = len(self.action_set)

    def reset(self):
        self.env.reset()
        return self.env.state().flatten()

    def step(self, actions):
        rew, done = self.env.act(self.action_set[actions[0]])
        obs = self.reset() if done else self.env.state()
        return obs.flatten(), np.asarray(rew), np.asarray(done), ""
                    default=1)
params = parser.parse_args()

if params.game == "all":
    games = ["seaquest", "asterix", "breakout", "freeway", "space_invaders"]
else:
    games = [params.game]

for i in range(params.n_seeds):

    for game in games:

        seed = random.randint(0, 1e6)

        notes = "DQN"
        env = Environment(game, random_seed=seed)
        env = MinatarWrapper(env)
        nb_steps = params.nb_steps

        agent = DQN(env,
                    CNNMinAtar,
                    replay_start_size=5000,
                    replay_buffer_size=100000,
                    gamma=0.99,
                    update_target_frequency=1000,
                    minibatch_size=32,
                    learning_rate=1e-4,
                    initial_exploration_rate=1,
                    final_exploration_rate=0.03,
                    final_exploration_step=100000,
                    adam_epsilon=1e-8,
Beispiel #12
0
class BaseEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, display_time=50, **kwargs):
        self.game_name = 'Game Name'
        self.display_time = display_time
        self.init(**kwargs)

    def init(self, **kwargs):
        self.game = Environment(env_name=self.game_name, **kwargs)
        self.action_set = self.game.env.action_map
        self.action_space = spaces.Discrete(self.game.num_actions())
        self.observation_space = spaces.Box(0.0,
                                            1.0,
                                            shape=self.game.state_shape(),
                                            dtype=np.float32)

    def step(self, action):
        reward, done = self.game.act(action)
        return (self.game.state(), reward, done, {})

    def reset(self):
        self.game.reset()
        return self.game.state()

    def seed(self, seed=None):
        self.game = Environment(env_name=self.game_name, random_seed=seed)
        return seed

    def render(self, mode='human'):
        if mode == 'rgb_array':
            return self.game.state()
        elif mode == 'human':
            self.game.display_state(self.display_time)

    def close(self):
        if self.game.visualized:
            self.game.close_display()
        return 0
Beispiel #13
0
 def seed(self, seed=None):
     self.game = Environment(env_name=self.game_name, random_seed=seed)
     return seed
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--output", "-o", type=str)
    parser.add_argument("--verbose", "-v", action="store_true")
    parser.add_argument("--loadfile", "-l", type=str)
    parser.add_argument("--alpha", "-a", type=float, default=STEP_SIZE)
    parser.add_argument("--save", "-s", action="store_true")
    parser.add_argument("--replayoff", "-r", action="store_true")
    parser.add_argument("--targetoff", "-t", action="store_true")
    parser.add_argument("--ramp-difficulty",
                        default=False,
                        action="store_true")
    parser.add_argument("--sticky-actions", default=False, action="store_true")
    parser.add_argument("--save-dataset", default=False, action="store_true")
    parser.add_argument("--num-frames", type=int, default=5000000)
    args = parser.parse_args()

    global NUM_FRAMES
    NUM_FRAMES = args.num_frames

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    # If there's an output specified, then use the user specified output.  Otherwise, create file in the current
    # directory with the game's name.
    if args.output:
        file_name = args.output
    else:
        file_name = os.getcwd() + "/" + args.game

    load_file_path = None
    if args.loadfile:
        load_file_path = args.loadfile

    env = Environment(args.game,
                      sticky_action_prob=0.1 if args.sticky_actions else 0.0,
                      difficulty_ramping=args.ramp_difficulty)

    print('Cuda available?: ' + str(torch.cuda.is_available()))
    policy_net = dqn(env, args.replayoff, args.targetoff, file_name, args.save,
                     load_file_path, args.alpha)

    if args.save_dataset:

        epsilon = 0.1
        num_steps = 100000
        num_actions = env.num_actions()
        transitions = []

        env.reset()

        for i in range(num_steps):

            if i % 1000 == 0:
                logging.info("data collection step {:d}".format(i))

            s = env.state()
            s_t = get_state(s)

            with torch.no_grad():
                q_values = policy_net(s_t)

            if numpy.random.uniform(0, 1) < epsilon:
                action = torch.tensor([[random.randrange(num_actions)]],
                                      device=device)
            else:
                action = q_values.max(1)[1].view(1, 1)

            reward, terminated = env.act(action)

            s_prime = env.state()
            s_prime_t = get_state(s_prime)

            with torch.no_grad():
                q_values_prime = policy_net(s_prime_t)

            t = Transition(s,
                           int(action.cpu().numpy()[0, 0]),
                           float(reward),
                           s_prime,
                           False,
                           bool(terminated),
                           q_values=q_values.cpu().numpy(),
                           next_q_values=q_values_prime.cpu().numpy())
            transitions.append(t)

            if terminated:
                env.reset()

        file_name = os.path.join("dataset", "{:s}.pickle".format(args.game))
        with open(file_name, "wb") as file:
            pickle.dump(transitions, file)
 def __init__(self, name, seed):
     self.env = Environment(name, random_seed=seed)
Beispiel #16
0
    def run_abstract_agent_(self,
                            cluster_q_values=False,
                            cluster_q_values_from_model=False,
                            learned_cluster_q_values=False,
                            new_transitions=False,
                            rewards_no_actions=False,
                            failure_rewards=False,
                            soft_failure_rewards=False):
        def classify(state):

            _, log_probs = self.model.encode(state[np.newaxis],
                                             hand_states=np.zeros(
                                                 (1, 1), dtype=np.int32))
            log_probs = log_probs[0]

            if self.hard_abstract_state:
                idx = np.argmax(log_probs)
                output = np.zeros_like(log_probs)
                output[idx] = 1.0
            elif self.sample_abstract_state:
                idx = np.random.choice(list(range(len(log_probs))),
                                       p=np.exp(log_probs))
                output = np.zeros_like(log_probs)
                output[idx] = 1.0
            else:
                output = np.exp(log_probs)

            return output

        self.game_env = Environment(self.game,
                                    difficulty_ramping=False,
                                    sticky_action_prob=0.0)

        if cluster_q_values:
            if cluster_q_values_from_model:
                q_values = self.model.session.run(self.model.cluster_qs_v)
                rewards_name = "rewards_q_values_from_model"
                gifs_name = "gifs_q_values_from_model"
            else:
                q_values = self.cluster_q_values
                rewards_name = "rewards_q_values"
                gifs_name = "gifs_q_values"
        elif learned_cluster_q_values:
            q_values = self.learned_qs
            rewards_name = "rewards_learned_q_values"
            gifs_name = "gifs_learned_q_values"
        else:
            q_values = self.abstract_mdp.state_action_values
            rewards_name = "rewards"
            gifs_name = "gifs"

        if new_transitions:
            rewards_name += "_new_t"
            gifs_name += "_new_t"

        if rewards_no_actions:
            rewards_name += "_r_no_a"
            gifs_name += "_r_no_a"

        if failure_rewards:
            rewards_name += "_failure_r"
            gifs_name += "_failure_r"

        if soft_failure_rewards:
            rewards_name += "_soft_failure_r"
            gifs_name += "_soft_failure_r"

        self.abstract_agent = QuotientMDPNBisim(
            classify,
            self.game_env,
            q_values,
            minatar=True,
            softmax_policy=self.softmax_policy,
            softmax_policy_temp=self.softmax_policy_temp)

        if self.save_gifs:
            gifs_path = self.saver.get_new_dir(gifs_name)
        else:
            gifs_path = None

        solver = SolverMinAtar(self.game_env,
                               self.abstract_agent,
                               int(1e+7),
                               int(1e+7),
                               0,
                               max_episodes=self.eval_episodes,
                               train=False,
                               gif_save_path=gifs_path,
                               rewards_file=self.saver.get_save_file(
                                   rewards_name, "dat"))
        solver.run()
Beispiel #17
0
# Authors:                                                                                                     #
# Kenny Young ([email protected])                                                                            #
# Tian Tian([email protected])                                                                                 #
#                                                                                                              #
# python3 random_play.py -g <game>                                                                             #                                                              #
################################################################################################################
import random, numpy, argparse
from minatar import Environment

NUM_EPISODES = 1000

parser = argparse.ArgumentParser()
parser.add_argument("--game", "-g", type=str)
args = parser.parse_args()

env = Environment(args.game)

e = 0
returns = []
num_actions = env.num_actions()

# Run NUM_EPISODES episodes and log all returns
while e < NUM_EPISODES:
    # Initialize the return for every episode
    G = 0.0

    # Initialize the environment
    env.reset()
    terminated = False

    #Obtain first state, unused by random agent, but inluded for illustration
Beispiel #18
0
import argparse
import tkinter as Tk
from minatar import Environment, GUI

################################################################################################################
# Script that allows a human to play any of the MinAtar games. Use arrow keys to move and space to fire.
# Pressing q will exit the game, r will restart.
#
################################################################################################################
parser = argparse.ArgumentParser()
parser.add_argument("--game", "-g", type=str)
args = parser.parse_args()

# Setup game environment and GUI
env = Environment(args.game)
gui = GUI(env.game_name(), env.n_channels)

# Thread safe variables for use with GUI
action = Tk.IntVar()
action.set(0)
action_taken = Tk.BooleanVar()
action_taken.set(False)
action_released = Tk.BooleanVar()
action_released.set(False)
G = Tk.DoubleVar()
G.set(0.0)
is_terminate = Tk.BooleanVar()
is_terminate.set(False)

# Map input keys to agent actions