Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--output", "-o", type=str)
    parser.add_argument("--verbose", "-v", action="store_true")
    parser.add_argument("--loadfile", "-l", type=str)
    parser.add_argument("--save", "-s", action="store_true")
    parser.add_argument("--replayoff", "-r", action="store_true")
    parser.add_argument("--targetoff", "-t", action="store_true")
    parser.add_argument("--ramp-difficulty",
                        default=False,
                        action="store_true")
    parser.add_argument("--sticky-actions", default=False, action="store_true")
    parser.add_argument("--save-dataset", default=False, action="store_true")
    parser.add_argument("--num-frames", type=int, default=5000000)
    args = parser.parse_args()

    env = Environment(args.game,
                      sticky_action_prob=0.1 if args.sticky_actions else 0.0,
                      difficulty_ramping=args.ramp_difficulty)

    num_episodes = 100
    num_actions = env.num_actions()

    reward_per_episode = []
    episode_rewards = []

    env.reset()

    for i in range(10000000):

        s = env.state()

        action = random.randrange(num_actions)
        reward, terminated = env.act(action)

        episode_rewards.append(reward)

        if terminated:
            reward_per_episode.append(numpy.sum(episode_rewards))
            episode_rewards = []

            if len(reward_per_episode) == num_episodes:
                break

            env.reset()

    print(numpy.mean(reward_per_episode))
Esempio n. 2
0
class BaseEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, display_time=50, **kwargs):
        self.game_name = 'Game Name'
        self.display_time = display_time
        self.init(**kwargs)

    def init(self, **kwargs):
        self.game = Environment(env_name=self.game_name, **kwargs)
        self.action_set = self.game.env.action_map
        self.action_space = spaces.Discrete(self.game.num_actions())
        self.observation_space = spaces.Box(0.0,
                                            1.0,
                                            shape=self.game.state_shape(),
                                            dtype=np.float32)

    def step(self, action):
        reward, done = self.game.act(action)
        return (self.game.state(), reward, done, {})

    def reset(self):
        self.game.reset()
        return self.game.state()

    def seed(self, seed=None):
        self.game = Environment(env_name=self.game_name, random_seed=seed)
        return seed

    def render(self, mode='human'):
        if mode == 'rgb_array':
            return self.game.state()
        elif mode == 'human':
            self.game.display_state(self.display_time)

    def close(self):
        if self.game.visualized:
            self.game.close_display()
        return 0
Esempio n. 3
0
# python3 random_play.py -g <game>                                                                             #                                                              #
################################################################################################################
import random, numpy, argparse
from minatar import Environment

NUM_EPISODES = 1000

parser = argparse.ArgumentParser()
parser.add_argument("--game", "-g", type=str)
args = parser.parse_args()

env = Environment(args.game)

e = 0
returns = []
num_actions = env.num_actions()

# Run NUM_EPISODES episodes and log all returns
while e < NUM_EPISODES:
    # Initialize the return for every episode
    G = 0.0

    # Initialize the environment
    env.reset()
    terminated = False

    #Obtain first state, unused by random agent, but inluded for illustration
    s = env.state()
    while (not terminated):
        # Select an action uniformly at random
        action = random.randrange(num_actions)
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--output", "-o", type=str)
    parser.add_argument("--verbose", "-v", action="store_true")
    parser.add_argument("--loadfile", "-l", type=str)
    parser.add_argument("--alpha", "-a", type=float, default=STEP_SIZE)
    parser.add_argument("--save", "-s", action="store_true")
    parser.add_argument("--replayoff", "-r", action="store_true")
    parser.add_argument("--targetoff", "-t", action="store_true")
    parser.add_argument("--ramp-difficulty",
                        default=False,
                        action="store_true")
    parser.add_argument("--sticky-actions", default=False, action="store_true")
    parser.add_argument("--save-dataset", default=False, action="store_true")
    parser.add_argument("--num-frames", type=int, default=5000000)
    args = parser.parse_args()

    global NUM_FRAMES
    NUM_FRAMES = args.num_frames

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    # If there's an output specified, then use the user specified output.  Otherwise, create file in the current
    # directory with the game's name.
    if args.output:
        file_name = args.output
    else:
        file_name = os.getcwd() + "/" + args.game

    load_file_path = None
    if args.loadfile:
        load_file_path = args.loadfile

    env = Environment(args.game,
                      sticky_action_prob=0.1 if args.sticky_actions else 0.0,
                      difficulty_ramping=args.ramp_difficulty)

    print('Cuda available?: ' + str(torch.cuda.is_available()))
    policy_net = dqn(env, args.replayoff, args.targetoff, file_name, args.save,
                     load_file_path, args.alpha)

    if args.save_dataset:

        epsilon = 0.1
        num_steps = 100000
        num_actions = env.num_actions()
        transitions = []

        env.reset()

        for i in range(num_steps):

            if i % 1000 == 0:
                logging.info("data collection step {:d}".format(i))

            s = env.state()
            s_t = get_state(s)

            with torch.no_grad():
                q_values = policy_net(s_t)

            if numpy.random.uniform(0, 1) < epsilon:
                action = torch.tensor([[random.randrange(num_actions)]],
                                      device=device)
            else:
                action = q_values.max(1)[1].view(1, 1)

            reward, terminated = env.act(action)

            s_prime = env.state()
            s_prime_t = get_state(s_prime)

            with torch.no_grad():
                q_values_prime = policy_net(s_prime_t)

            t = Transition(s,
                           int(action.cpu().numpy()[0, 0]),
                           float(reward),
                           s_prime,
                           False,
                           bool(terminated),
                           q_values=q_values.cpu().numpy(),
                           next_q_values=q_values_prime.cpu().numpy())
            transitions.append(t)

            if terminated:
                env.reset()

        file_name = os.path.join("dataset", "{:s}.pickle".format(args.game))
        with open(file_name, "wb") as file:
            pickle.dump(transitions, file)