Beispiel #1
0
class game(object):
    def __init__(self, display):
        self.ale = ALEInterface()

        # Get & Set the desired settings
        self.ale.setInt('random_seed', 123)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = display
        if USE_SDL:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        # Load the ROM file
        self.ale.loadROM("ms_pacman.bin")

    def act(self, action):
        return self.ale.act(action)

    def getState(self):
        return get_feature(self.ale.getScreen())

    def getScreen(self):
        return self.ale.getScreen()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def game_over(self):
        return self.ale.game_over()
#     ale.setBool('display_screen', True)

# Load the ROM file
ale.loadROM('Breakout.bin')

# Get the list of legal actions
# legal_actions = ale.getLegalActionSet()
legal_actions = ale.getMinimalActionSet()
print legal_actions

# (screen_width,screen_height) = ale.getScreenDims()
# screen_data = np.zeros(screen_width*screen_height,dtype=np.uint32)
# ale.getScreenRGB(screen_data)

(screen_width, screen_height) = ale.getScreenDims()
screen_data = np.zeros(screen_width * screen_height, dtype=np.uint8)
print type(ale.getScreen(screen_data))

# Play 10 episodes
for episode in xrange(10):

    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        # Apply an action and get the resulting reward
        reward = ale.act(a)
        print  reward
        total_reward += reward
    print 'Episode', episode, 'ended with score:', total_reward
    ale.reset_game()
Beispiel #3
0
    cv2.imshow("ImageGray", observation)
    cv2.waitKey(10)

while frameCount is not maxFrame:

    ale.reset_game()
    score = 0
    cost_average = 0.0
    frameCountLast = frameCount
    t0 = time.time()
    t1s = t2s = t3s = t4s = t5s = t6s = t7s = t8s = t9s = 0
    while not ale.game_over():

        t00 = time.time()

        imgBinary = Scale(ale.getScreen())

        t1 = time.time()
        t1s += t1 - t00

        if np.random.rand(1) > explorationRate:
            [actionIndex, actionValue] = forward([imgBinary],
                                                 Q_train,
                                                 all=False)
        else:
            actionIndex = randrange(len(legal_actions))  # get action

        t2 = time.time()
        t2s += t2 - t1

        reward = ale.act(legal_actions[actionIndex])  # reward
Beispiel #4
0
State1 = np.zeros([batchSize,network_size])
Action0 = np.zeros([batchSize])
Reward0 = np.zeros([batchSize])


for episode in xrange(maxEpisode):

    ale.reset_game()
    score = 0
    cost_average = 0.0
    frameCountLast = frameCount
    t0   = time.time()

    while not ale.game_over():

        imgBinary = Scale(ale.getScreen())
        if np.random.rand(1) > explorationRate:
            [actionIndex, actionValue] = forward([imgBinary],Q_train,  all=False)
        else:
            actionIndex = randrange(len(legal_actions))  # get action
        reward = ale.act(legal_actions[actionIndex])  # reward
        memory.append([imgBinary,actionIndex,reward])
        score += reward

        if frameCount >= startLearningFrame -1:
            index = np.random.permutation(len(memory) - 1)[0:batchSize]

            for i in xrange(batchSize):
                State0[i,:] = memory[index[i]][0]
                State1[i,:] = memory[index[i]+1][0]
                Action0[i] = memory[index[i]][1]
Beispiel #5
0
def forward(input, all = False):
    actionValues = sess.run(y, feed_dict={x: input})
    if all is True:
        return actionValues
    actionValue_max= np.max(actionValues)
    index = np.argmax(actionValues,axis = 1)
    return [index, actionValue_max]





ale = ALEInterface()
ale.loadROM("Breakout.A26")
legal_actions = ale.getLegalActionSet()
img = ale.getScreen()
actionIndex = forward(img)
reward = ale.act(legal_actions(actionIndex))

# Get & Set the desired settings
ale.setInt('random_seed', 123)
ale.setInt("frame_skip",frameSkip)


# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = True
if USE_SDL:
  if sys.platform == 'darwin':
    import pygame
Beispiel #6
0
    f = open(file_name, "r")
    bg = []
    while True:
        c = f.read(1)
        if not c:
            break
        if c == '*':
            bg.append(0)
        if c == ';':
            bg.append(144)
        if c == '|':
            bg.append(74)
    f.close()
    return bg


for episode in xrange(200):
    total_reward = 0
    k = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        # Apply an action and get the resulting reward
        reward = ale.act(a)
        if reward > 10:
            print "happened"
            screen = ale.getScreen()
            printScreen4(screen)
        total_reward += reward
    print 'Episode', episode, 'ended with score:', total_reward
    ale.reset_game()
Beispiel #7
0
terminal = 1

# t0s = t1s = t2s = t3s = t4s =t5s =t6s=t7s= 0

ale.reset_game()

trainThread = threading.Thread(target=train)
trainThread.start()


for frameCount in xrange(maxFrame):

    t00 = time.time()

    lives = ale.lives()
    observe = Scale(ale.getScreen())  # 0.08s


    if terminal:
        actionIndex = 1
        # # a random start
        # ale.act(1)
        # for i in xrange(np.random.randint(0,maxRandomStartFrame)):
        #     ale.act(np.random.randint(len(legal_actions)))
        # ale.act(0)
        # actionIndex = 0
    else:
        if np.random.rand(1) > explorationRate:
            [actionIndex, actionValue] = forward([np.transpose(memory.History,[1,2,0])],Q_train,  all=False)
        else:
            actionIndex = np.random.randint(len(legal_actions))  # get action
Beispiel #8
0
    saver.restore(sess, loadModelPath)


def forward(input, all=False):
    actionValues = sess.run(y, feed_dict={x: input})
    if all is True:
        return actionValues
    actionValue_max = np.max(actionValues)
    index = np.argmax(actionValues, axis=1)
    return [index, actionValue_max]


ale = ALEInterface()
ale.loadROM("Breakout.A26")
legal_actions = ale.getLegalActionSet()
img = ale.getScreen()
actionIndex = forward(img)
reward = ale.act(legal_actions(actionIndex))

# Get & Set the desired settings
ale.setInt('random_seed', 123)
ale.setInt("frame_skip", frameSkip)

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = True
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
Beispiel #9
0
scoreEpisode = 0.0
cost_average = 0.0
frameCount = 0
frameCountLast = frameCount
terminal = 1

t0s = t1s = t2s = t3s = t4s =t5s =t6s=t7s= 0

ale.reset_game()

for frameCount in xrange(maxFrame):

    # t00 = time.time()

    lives = ale.lives()
    observe = Scale(ale.getScreen())  # 0.08s

    # t1 = time.time()
    # t1s += t1 - t00

    if terminal:
        actionIndex = 1
    else:
        if np.random.rand(1) > explorationRate:
            [actionIndex, actionValue] = forward([np.transpose(memory.History,[1,2,0])],Q_train,  all=False)
        else:
            actionIndex = np.random.randint(len(legal_actions))  # get action

    # t2 = time.time()
    # t2s += t2 - t1
Beispiel #10
0
class Game:
    def __init__(self, state_height, state_width, display_screen=False):
        self.ale = ALEInterface()
        self.ale.setInt("frame_skip", 4)
        self.ale.setInt("random_seed", 123)
        self.ale.setBool("display_screen", display_screen)
        self.ale.loadROM("roms/breakout.bin")
        self.actions = self.ale.getMinimalActionSet()
        self.score = 0
        self.actions_len = len(self.actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.state_width = state_width
        self.state_height = state_height
        self.state_len = self.state_width * self.state_height
        self.make_move(self.actions[0])
        self.make_move(self.actions[1])

    def get_state(self):
        screen_data = np.zeros(self.screen_width*self.screen_height,dtype=np.uint8)
        self.ale.getScreen(screen_data)
        screen_data_2D = np.reshape(screen_data, (self.screen_height, self.screen_width))
        resized_screen_data_2D = imresize(
            screen_data_2D, (self.state_height, self.state_width))
        resized_screen_data = np.reshape(
            resized_screen_data_2D, self.state_width * self.state_height)
        return resized_screen_data.astype(dtype=np.float32) / 255.0

    def get_state_dims(self):
        return (self.state_width, self.state_height, 1)

    def save_state_to_img(self, fn):
        screen_data = np.zeros(self.screen_width*self.screen_height,dtype=np.uint8)
        self.ale.getScreen(screen_data)
        screen_data_2D = np.reshape(screen_data, (self.screen_height, self.screen_width))
        resized_screen_data_2D = imresize(
            screen_data_2D, (self.state_height, self.state_width))
        imsave(fn, resized_screen_data_2D)
        
    def make_move(self, action):
        r = self.ale.act(action)
        self.score += r
        return r

    def reset_game(self):
        self.ale.reset_game()
        self.score = 0
        self.make_move(self.actions[0])

    def game_over(self):
        return self.ale.game_over()

    def play(self):
        while True: 
            while not self.game_over():
                self.make_move(self.actions[np.random.randint(0, len(self.actions))])
            print("Game Over! Score: %s" % self.score)
            self.reset_game()

    def play_interactive(self):
        """
        play using 0,1,2,3
        save using 8
        """
        buf = []
        while True:
            S = self.get_state()
            a = int(raw_input())
            if(a == 8):
                with open("data.pickle", "w") as f:
                    pickle.dump(buf, f)
                break
            if(a > 3 or a is None):
                continue
            r = self.make_move(self.actions[a])
            S_ = self.get_state()
            terminal = self.game_over()
            if terminal:
                self.reset_game()
            buf.append((S, a, r, S_, terminal))
Beispiel #11
0
#     ale.setBool('display_screen', True)

# Load the ROM file
ale.loadROM('Breakout.bin')

# Get the list of legal actions
# legal_actions = ale.getLegalActionSet()
legal_actions = ale.getMinimalActionSet()
print legal_actions

# (screen_width,screen_height) = ale.getScreenDims()
# screen_data = np.zeros(screen_width*screen_height,dtype=np.uint32)
# ale.getScreenRGB(screen_data)

(screen_width, screen_height) = ale.getScreenDims()
screen_data = np.zeros(screen_width * screen_height, dtype=np.uint8)
print type(ale.getScreen(screen_data))

# Play 10 episodes
for episode in xrange(10):

    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        # Apply an action and get the resulting reward
        reward = ale.act(a)
        print reward
        total_reward += reward
    print 'Episode', episode, 'ended with score:', total_reward
    ale.reset_game()
Beispiel #12
0
def train_agent(gamepath, agent, n_episodes, display_screen, record_weights,
                reduce_exploration_prob_amount, n_frames_to_skip):
    """
    :description: trains an agent to play a game

    :type gamepath: string
    :param gamepath: path to the binary of the game to be played

    :type agent: subclass RLAlgorithm
    :param agent: the algorithm/agent that learns to play the game

    :type n_episodes: int
    :param n_episodes: number of episodes of the game on which to train
    """

    # load the ale interface to interact with
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    # display/recording settings, doesn't seem to work currently
    #recordings_dir = './recordings/breakout/'
    # previously "USE_SDL"
    if display_screen:
        if sys.platform == 'darwin':
            print 'darwin'
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(gamepath)
    ale.setInt("frame_skip", n_frames_to_skip)

    screen_preprocessor = screen_utils.RGBScreenPreprocessor()
    screen_dims = ale.getScreenDims()
    print screen_dims

    rewards = []
    best_reward = 0
    print('starting training...')
    for episode in xrange(n_episodes):
        action = 0
        reward = 0
        newAction = None

        total_reward = 0
        counter = 0
        lives = ale.lives()

        screen = np.zeros((160 * 210),
                          dtype=np.int8)  #np.zeros((32, 32, 3), dtype=np.int8)
        state = {
            "screen": screen,
            #"objects" : None,
            #"prev_objects": None,
            #"prev_action": 0,
            "action": 0
        }
        if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
            video = cv2.VideoWriter(
                'video/episode-{}-{}-video.avi'.format(episode, agent.name),
                cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 24, screen_dims)

        start = time.time()

        while not ale.game_over():
            # if newAction is None then we are training an off-policy algorithm
            # otherwise, we are training an on policy algorithm
            if newAction is None:
                action = agent.getAction(state)
            else:
                action = newAction
            reward += ale.act(action)

            if ale.lives() < lives:
                lives = ale.lives()
                #reward -= 1
            total_reward += reward

            new_screen = ale.getScreen()  #getScreenRGB()
            #print screen.shape, new_screen.shape
            if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
                video.write(ale.getScreenRGB())
            #new_screen = screen_preprocessor.preprocess(new_screen)
            new_state = {
                "screen": new_screen,
                #"objects": None,
                #"prev_objects": state["objects"],
                #"prev_action": state["action"],
                "action": action
            }
            if counter % (n_frames_to_skip + 1) == 0:
                newAction = agent.incorporateFeedback(state, action, reward,
                                                      new_state)

            state = new_state
            reward = 0
            counter += 1

        end = time.time()
        rewards.append(total_reward)

        if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON:
            agent.explorationProb -= reduce_exploration_prob_amount

        print('episode: {}, score: {}, number of frames: {}, time: {:.4f}m'.
              format(episode, total_reward, counter, (end - start) / 60))

        if total_reward > best_reward and record_weights:
            best_reward = total_reward
            print("Best reward: {}".format(total_reward))

        if episode % PRINT_TRAINING_INFO_PERIOD == 0:
            print '\n############################'
            print '### training information ###'
            print("Average reward: {}".format(np.mean(rewards)))
            print("Last 50: {}".format(
                np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:])))
            print("Exploration probability: {}".format(agent.explorationProb))
            #print('action: {}'.format(action))
            print('size of weights dict: {}'.format(len(agent.weights)))
            #print('current objects: {}'.format(state['objects']))
            #print('previous objects: {}'.format(state['prev_objects']))
            weights = [v for k, v in agent.weights.iteritems()]
            min_feat_weight = min(weights)
            max_feat_weight = max(weights)
            avg_feat_weight = np.mean(weights)
            print('min feature weight: {}'.format(min_feat_weight))
            print('max feature weight: {}'.format(max_feat_weight))
            print('average feature weight: {}'.format(avg_feat_weight))
            print '############################'
            print '############################\n'

        if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
            file_utils.save_rewards(rewards,
                                    filename='{}-rewards'.format(agent.name))
            file_utils.save_weights(agent.weights,
                                    filename='episode-{}-{}-weights'.format(
                                        episode, agent.name))
            video.release()

        ale.reset_game()
    return rewards
Beispiel #13
0
class MsPacManGame(object):
    """Ms. Pac-Man Arcade Learning Environment wrapper class."""
    def __init__(self, seed, display):
        """Constructs a MsPacManGame.

        Args:
            seed: Initial random seed, randomized when None.
            display: Whether to display onto the screen or not.
        """
        self._ale = ALEInterface()

        if seed is None:
            seed = random.randint(0, 255)
        self._ale.setInt("random_seed", seed)

        if display:
            if sys.platform == "darwin":
                # Use PyGame in macOS.
                import pygame
                pygame.init()

                # Sound doesn't work on macOS.
                self._ale.setBool("sound", False)
            elif sys.platform.startswith("linux"):
                self._ale.setBool("sound", True)

            self._ale.setBool("display_screen", True)

        self._ale.loadROM("MS_PACMAN.BIN")

        self._reward = 0
        self._raw_ms_pacman_position = (0, 0)

        self.__screen = self._ale.getScreen()
        self.__ram = self._ale.getRAM()

        self._lives = self._ale.lives()

        self._update_state()

        self._go_to((94, 98), 3)

    @property
    def lives(self):
        """Current lives remaining."""
        return self._lives

    @property
    def reward(self):
        """Current total reward."""
        return self._reward

    @property
    def map(self):
        """Current game map."""
        return self._map

    @property
    def sliced_map(self):
        """Current game slice map."""
        return self._sliced_map

    @property
    def ms_pacman_position(self):
        """Ms. PacMan's position as a map index."""
        return self._ms_pacman_position

    @property
    def fruit(self):
        """Fruit."""
        return self._fruit

    @property
    def ghosts(self):
        """List of ghosts."""
        return self._ghosts

    def available_actions(self):
        """Returns a list of available actions to consider."""
        actions = []

        for action, move in [
            (2, (-1, 0)),  # up
            (3, (0, 1)),  # right
            (4, (0, -1)),  # left
            (5, (1, 0))  # down
        ]:
            new_pos = self.get_next_position(self._ms_pacman_position, move)
            if 0 <= new_pos[0] < GameMap.HEIGHT:
                if self._map.map[new_pos] != GameMapObjects.WALL:
                    actions.append(action)
        return actions

    def action_to_move(self, action):
        return [(-1, 0), (0, 1), (0, -1), (1, 0)][action - 2]

    def get_next_position(self, curr_position, move):
        new_pos = (curr_position[0] + move[0], curr_position[1] + move[1])
        if new_pos[1] < 0:
            new_pos = (new_pos[0], new_pos[1] + GameMap.WIDTH)
        elif new_pos[1] >= GameMap.WIDTH:
            new_pos = (new_pos[0], new_pos[1] - GameMap.WIDTH)
        return new_pos

    def act(self, action):
        """Plays a given action in the game.

        Args:
            action: Action to play.

        Returns:
            Partial reward gained since last action.
        """
        m = self.action_to_move(action)
        next_pos = self.get_next_position(self._ms_pacman_position, m)
        old_reward = self._reward
        old_lives = self._lives

        expected_reward = GameMapObjects.to_reward(self._map.map[next_pos])

        MAX_ACTION_COUNT = 20
        for _ in range(MAX_ACTION_COUNT):
            if expected_reward <= 0:
                if self._ms_pacman_position == next_pos:
                    break
            elif self._reward != old_reward:
                break

            if self.game_over() or self._lives < old_lives:
                return GameMapObjects.to_reward(GameMapObjects.BAD_GHOST)

            self._reward += self._ale.act(action)
            self._update_state()

        self._update_map()
        return self._reward - old_reward

    def _go_to(self, raw_pos, action):
        """Goes to a given position."""
        while (abs(self._raw_ms_pacman_position[0] - raw_pos[0]) > 1
               or abs(self._raw_ms_pacman_position[1] - raw_pos[1]) > 1):
            self._ale.act(action)
            self._update_state()
        self._update_map()

    def game_over(self):
        """Returns whether the game reached a terminal state or not."""
        return self._ale.game_over()

    def reset_game(self):
        """Resets the game to the initial state."""
        self._reward = 0
        return self._ale.reset_game()

    def _to_map_position(self, pos):
        """Converts a RAM coordinate into a map coordinate.

        Args:
            pos: (x, y) coordinates from RAM.

        Returns:
            Map index coordinate.
        """
        x, y = pos
        i = round((y - 2) / 12.0)
        if x < 83:
            j = round((x - 18) / 8.0 + 1)
        elif 93 < x < 169:
            j = round((x - 22) / 8.0 + 1)
        elif x > 169:
            j = 0
        elif x < 88:
            j = 9
        else:
            j = 10
        return i, j

    def _to_raw_position(self, pos):
        i, j = pos
        y = i * 12 + 2
        if j == 0:
            x = 12
        elif j <= 9:
            x = (j - 1) * 8 + 18
        else:
            x = (j - 1) * 8 + 22
        return x, y

    def _update_state(self):
        """Updates the internal state of the game."""
        # Get new states from RAM.
        self._ale.getRAM(self.__ram)
        new_ms_pacman_position = (int(self.__ram[10]), int(self.__ram[16]))
        new_ghosts_ram = [
            ((int(self.__ram[6]), int(self.__ram[12])), int(self.__ram[1])),
            ((int(self.__ram[7]), int(self.__ram[13])), int(self.__ram[2])),
            ((int(self.__ram[8]), int(self.__ram[14])), int(self.__ram[3])),
            ((int(self.__ram[9]), int(self.__ram[15])), int(self.__ram[4]))
        ]
        fruit = (int(self.__ram[11]), int(self.__ram[17])), int(self.__ram[5])
        self._fruit = Fruit.from_ram(self._to_map_position(fruit[0]), fruit[1],
                                     fruit[0][0] != 0)

        # Update positions.
        self._raw_ms_pacman_position = new_ms_pacman_position
        self._ms_pacman_position = self._to_map_position(
            new_ms_pacman_position)
        self._ghosts = [
            Ghost.from_ram(self._to_map_position(pos), ram)
            for pos, ram in new_ghosts_ram
        ]

        # Update lives.
        self._lives = self._ale.lives()

    def _update_map(self):
        # Get new map from screen.
        self._ale.getScreen(self.__screen)
        self._map = GameMap(self.__screen.reshape(210, 160))
        self._blank_map = GameMap.from_map(self._map.map.copy())
        self._map.map[self._ms_pacman_position] = GameMapObjects.MS_PACMAN
        if self._fruit.exists:
            self._map.map[self._fruit.position] = GameMapObjects.FRUIT
        for ghost in self._ghosts:
            if ghost.state == Ghost.GOOD:
                self._map.map[ghost.position] = GameMapObjects.GOOD_GHOST
            elif ghost.state == Ghost.BAD:
                self._map.map[ghost.position] = GameMapObjects.BAD_GHOST
        self._sliced_map = SlicedGameMap(self._map, self._ms_pacman_position)
ram_size = ale.getRAMSize()
ram=np.zeros((ram_size),dtype=np.uint8)
ale.getRAM(ram)
print ram[54:108]



(screen_width,screen_height) =ale.getScreenDims()
screen_data=np.zeros(screen_width*screen_height,dtype=np.uint32)

legal_actions = ale.getLegalActionSet()

# Play 10 episodes
for episode in xrange(10):
  total_reward = 0
  a=4
  while not ale.game_over():
    time.sleep(0.1)
    a = legal_actions[randrange(len(legal_actions))]
    reward = ale.act(a);
    total_reward += reward
    temp = [i for i in ram]
    ale.getRAM(ram)

      #print ram
    # print [temp[i]-ram[i] for i in range(len(ram))] 
    ale.getScreen(screen_data)
    print screen_data
  print 'Episode', episode, 'ended with score:', total_reward
  ale.reset_game()
Beispiel #15
0
class agent(object):
    def __init__(self):
        self.ale = ALEInterface()

        # Get & Set the desired settings
        self.ale.setInt('random_seed', 123)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = False
        if USE_SDL:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        # Load the ROM file
        self.ale.loadROM("ms_pacman.bin")
        #persistent:
        self.tetas = []
        self.Q = self.txtToMap(
            'qvalues.txt'
        )  #, a table of action values indexedby state and action, initially zero
        self.N = self.txtToMap(
            'nvalues.txt'
        )  #, a table of frequenciesfor state-action pairs, initially zero
        self.s = None
        self.a = None
        self.r = 0
        self.actions = self.ale.getMinimalActionSet()
        print self.actions
        #the previous state, action, and reward, initially null

    def Q_LEARNING_AGENT(self, state, reward):
        if self.ale.game_over():
            self.updateQ(self.s, None, reward)
        if self.s is not None:
            self.incrementN(self.s, self.a)
            val = self.computeNewQ(self.s, self.a, self.r, state)
            self.updateQ(self.s, self.a, val)
        self.s = state
        self.a = self.chooseAct(state)
        self.r = reward
        return self.a

    def computeNewQ(self, s, a, reward, state):
        qsa = self.getQ(s, a)
        maxQ = self.getQ(state, self.actions[0])
        for act in self.actions:
            val = self.getQ(state, act)
            if val > maxQ:
                maxQ = val
        n = self.getN(s, a)
        alp = self.alpha(n)
        v = qsa + alp * (reward + 0.9 * maxQ - qsa)
        return v

    def chooseAct(self, state):
        v = randrange(10)
        if v == 5: return self.actions[randrange(len(self.actions))]
        a = self.actions[0]
        maxQ = self.getQ(state, self.actions[0])
        for act in self.actions:
            val = self.getQ(state, act)
            if val > maxQ:
                maxQ = val
                a = act
        return a

    def alpha(self, Nsa):
        return 0.9

    def updateQ(self, s, a, value):
        self.Q[str(s) + "/" + str(a)] = value

    def getQ(self, s, a):
        return self.Q.get(str(s) + "/" + str(a), 0)

    def incrementN(self, s, a):
        self.N[str(s) + "/" + str(a)] = self.getN(s, a) + 1

    def getN(self, s, a):
        return self.N.get(str(s) + "/" + str(a), 0)

    def play(self, number):
        for episode in xrange(number):
            total_reward = 0
            self.s = None
            self.a = None
            reward = 0
            while not self.ale.game_over():
                state = hash(get_feature(self.ale.getScreen()))
                action = self.Q_LEARNING_AGENT(state, reward)
                # Apply an action and get the resulting reward
                reward = self.ale.act(action)
                total_reward += reward
            print 'Episode', episode, 'ended with score:', total_reward
            self.ale.reset_game()
        self.mapToTxt(self.Q, 'qvalues.txt')
        self.mapToTxt(self.N, 'nvalues.txt')

    def mapToTxt(self, hMap, filepath):
        f = open(filepath, 'r+')
        for elem in hMap.keys():
            toWrite = str(elem) + " " + str(hMap[elem]) + "\n"
            f.write(toWrite)
        f.close()

    def txtToMap(self, filepath):
        newMap = {}
        f = open(filepath)
        while True:
            string = f.readline()
            if not string: break
            tmp = self.stringSplitter(string)
            newMap[tmp[0]] = float(tmp[1])
        f.close()
        return newMap

    def stringSplitter(self, string):
        i = string.find(' ')
        head = string[:i]
        rest = string[i + 1:len(string) - 1]  # getting rid of the \n's
        return (head, rest)
  if sys.platform == 'darwin':
    import pygame
    pygame.init()
    ale.setBool('sound', False) # Sound doesn't work on OSX
  elif sys.platform.startswith('linux'):
    ale.setBool('sound', True)
  ale.setBool('display_screen', True)

# Load the ROM file
ale.loadROM(sys.argv[1])

# Get the list of legal actions
legal_actions = ale.getLegalActionSet()

# Play 10 episodes
screen = np.reshape(ale.getScreen(), (210, -1))
maze = detect_maze(screen)
image = pacman_image(maze)
# print_maze(maze)

for episode in xrange(1):
  total_reward = 0
  step = 1
  while not ale.game_over():
    # if step == 500:
    screen = np.reshape(ale.getScreen(), (210, -1))
    if step % 3 == 0:
      image.new_image(screen)
    a = legal_actions[randrange(len(legal_actions))]
    step += 1
    # Apply an action and get the resulting reward