Beispiel #1
0
class Minatar(BaseEnvironment):
    def __init__(self, name, seed):
        self.env = Environment(name, random_seed=seed)

    def start(self):
        self.env.reset()
        s = self.env.state()
        return s.astype('float32')

    def step(self, a):
        r, t = self.env.act(a)
        sp = self.env.state().astype('float32')

        return (r, sp, t)
class Minatar(BaseEnvironment):
    def __init__(self, name, seed):
        self.env = Environment(name, random_seed=seed)

    def start(self):
        self.env.reset()
        s = self.env.state()
        s = s.transpose(2, 0, 1)
        return s

    def step(self, a):
        r, t = self.env.act(a)
        sp = self.env.state()
        sp = sp.transpose(2, 0, 1)

        return (r, sp, t)
Beispiel #3
0
class MiniAtariTask:
    def __init__(self, env_id, seed=np.random.randint(int(1e5)), sticky_action_prob=0.0):
        random_seed(seed)
        # TODO: Allow sticky_action_prob and difficulty_ramping to be set by the configuration file
        self.env = Environment(env_id, random_seed=seed, sticky_action_prob=0.0, difficulty_ramping=False)
        self.name = env_id
        self.state_dim = self.env.state_shape()
        self.action_set = self.env.minimal_action_set()
        self.action_dim = len(self.action_set)

    def reset(self):
        self.env.reset()
        return self.env.state().flatten()

    def step(self, actions):
        rew, done = self.env.act(self.action_set[actions[0]])
        obs = self.reset() if done else self.env.state()
        return obs.flatten(), np.asarray(rew), np.asarray(done), ""
Beispiel #4
0
class BaseEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, display_time=50, **kwargs):
        self.game_name = 'Game Name'
        self.display_time = display_time
        self.init(**kwargs)

    def init(self, **kwargs):
        self.game = Environment(env_name=self.game_name, **kwargs)
        self.action_set = self.game.env.action_map
        self.action_space = spaces.Discrete(self.game.num_actions())
        self.observation_space = spaces.Box(0.0,
                                            1.0,
                                            shape=self.game.state_shape(),
                                            dtype=np.float32)

    def step(self, action):
        reward, done = self.game.act(action)
        return (self.game.state(), reward, done, {})

    def reset(self):
        self.game.reset()
        return self.game.state()

    def seed(self, seed=None):
        self.game = Environment(env_name=self.game_name, random_seed=seed)
        return seed

    def render(self, mode='human'):
        if mode == 'rgb_array':
            return self.game.state()
        elif mode == 'human':
            self.game.display_state(self.display_time)

    def close(self):
        if self.game.visualized:
            self.game.close_display()
        return 0
Beispiel #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--output", "-o", type=str)
    parser.add_argument("--verbose", "-v", action="store_true")
    parser.add_argument("--loadfile", "-l", type=str)
    parser.add_argument("--save", "-s", action="store_true")
    parser.add_argument("--replayoff", "-r", action="store_true")
    parser.add_argument("--targetoff", "-t", action="store_true")
    parser.add_argument("--ramp-difficulty",
                        default=False,
                        action="store_true")
    parser.add_argument("--sticky-actions", default=False, action="store_true")
    parser.add_argument("--save-dataset", default=False, action="store_true")
    parser.add_argument("--num-frames", type=int, default=5000000)
    args = parser.parse_args()

    env = Environment(args.game,
                      sticky_action_prob=0.1 if args.sticky_actions else 0.0,
                      difficulty_ramping=args.ramp_difficulty)

    num_episodes = 100
    num_actions = env.num_actions()

    reward_per_episode = []
    episode_rewards = []

    env.reset()

    for i in range(10000000):

        s = env.state()

        action = random.randrange(num_actions)
        reward, terminated = env.act(action)

        episode_rewards.append(reward)

        if terminated:
            reward_per_episode.append(numpy.sum(episode_rewards))
            episode_rewards = []

            if len(reward_per_episode) == num_episodes:
                break

            env.reset()

    print(numpy.mean(reward_per_episode))
Beispiel #6
0
e = 0
returns = []
num_actions = env.num_actions()

# Run NUM_EPISODES episodes and log all returns
while e < NUM_EPISODES:
    # Initialize the return for every episode
    G = 0.0

    # Initialize the environment
    env.reset()
    terminated = False

    #Obtain first state, unused by random agent, but inluded for illustration
    s = env.state()
    while (not terminated):
        # Select an action uniformly at random
        action = random.randrange(num_actions)

        # Act according to the action and observe the transition and reward
        reward, terminated = env.act(action)

        # Obtain s_prime, unused by random agent, but inluded for illustration
        s_prime = env.state()

        G += reward

    # Increment the episodes
    e += 1
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", "-g", type=str)
    parser.add_argument("--output", "-o", type=str)
    parser.add_argument("--verbose", "-v", action="store_true")
    parser.add_argument("--loadfile", "-l", type=str)
    parser.add_argument("--alpha", "-a", type=float, default=STEP_SIZE)
    parser.add_argument("--save", "-s", action="store_true")
    parser.add_argument("--replayoff", "-r", action="store_true")
    parser.add_argument("--targetoff", "-t", action="store_true")
    parser.add_argument("--ramp-difficulty",
                        default=False,
                        action="store_true")
    parser.add_argument("--sticky-actions", default=False, action="store_true")
    parser.add_argument("--save-dataset", default=False, action="store_true")
    parser.add_argument("--num-frames", type=int, default=5000000)
    args = parser.parse_args()

    global NUM_FRAMES
    NUM_FRAMES = args.num_frames

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    # If there's an output specified, then use the user specified output.  Otherwise, create file in the current
    # directory with the game's name.
    if args.output:
        file_name = args.output
    else:
        file_name = os.getcwd() + "/" + args.game

    load_file_path = None
    if args.loadfile:
        load_file_path = args.loadfile

    env = Environment(args.game,
                      sticky_action_prob=0.1 if args.sticky_actions else 0.0,
                      difficulty_ramping=args.ramp_difficulty)

    print('Cuda available?: ' + str(torch.cuda.is_available()))
    policy_net = dqn(env, args.replayoff, args.targetoff, file_name, args.save,
                     load_file_path, args.alpha)

    if args.save_dataset:

        epsilon = 0.1
        num_steps = 100000
        num_actions = env.num_actions()
        transitions = []

        env.reset()

        for i in range(num_steps):

            if i % 1000 == 0:
                logging.info("data collection step {:d}".format(i))

            s = env.state()
            s_t = get_state(s)

            with torch.no_grad():
                q_values = policy_net(s_t)

            if numpy.random.uniform(0, 1) < epsilon:
                action = torch.tensor([[random.randrange(num_actions)]],
                                      device=device)
            else:
                action = q_values.max(1)[1].view(1, 1)

            reward, terminated = env.act(action)

            s_prime = env.state()
            s_prime_t = get_state(s_prime)

            with torch.no_grad():
                q_values_prime = policy_net(s_prime_t)

            t = Transition(s,
                           int(action.cpu().numpy()[0, 0]),
                           float(reward),
                           s_prime,
                           False,
                           bool(terminated),
                           q_values=q_values.cpu().numpy(),
                           next_q_values=q_values_prime.cpu().numpy())
            transitions.append(t)

            if terminated:
                env.reset()

        file_name = os.path.join("dataset", "{:s}.pickle".format(args.game))
        with open(file_name, "wb") as file:
            pickle.dump(transitions, file)
Beispiel #8
0
transitions = []
num_actions = env.num_actions()

# Run NUM_EPISODES episodes and log all returns
while e < NUM_EPISODES:
    # Initialize the return for every episode
    G = 0.0
    T = 0

    # Initialize the environment
    env.reset(seed=e)
    terminated = False

    while(not terminated):
        # env.display_state()
        s = env.state()
        paddle_pos = s[9, :, 0].argmax()
        ball_pos_x = s[:,:,1].argmax() % 10
        ball_pos_y = s[:,:,1].argmax() // 10
        last_pos_x = s[:,:,2].argmax() % 10
        last_pos_y = s[:,:,2].argmax() // 10

        if paddle_pos == ball_pos_x:
            if last_pos_x < ball_pos_x: # moving right
                if ball_pos_x == 9: action = 0
                else: action = 3 # right
            elif last_pos_x > ball_pos_x: # moving left
                if ball_pos_x == 0: action = 0
                else: action = 1 # left
            else: # very start of game
                if ball_pos_x == 0: action = 3