예제 #1
0
파일: async_rl.py 프로젝트: agajews/tfbrain
class AsyncAtariEnvironment(object):
    def __init__(self, hyperparams, rom_fnm):
        self.hyperparams = hyperparams
        self.show_screen = hyperparams['show_screen']
        self.state_len = hyperparams['state_len']
        # self.screen_resize = hyperparams['screen_resize']
        self.rom_fnm = rom_fnm
        self.init_ale(display=self.show_screen)
        self.actions = self.ale.getMinimalActionSet()
        print('Num possible actions: %d' % len(self.actions))
        self.state_shape = (84, 84, self.state_len)

    def init_ale(self, display=False):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123)
        # self.ale.setInt(b'delay_msec', 0)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setInt(b'frame_skip', self.hyperparams['frame_skip'])
        if display:
            self.ale.setBool(b'display_screen', True)
        self.ale.loadROM(str.encode(self.rom_fnm))

    # def set_screen_shape(self):
    #     self.start_episode()
    #     # self.perform_action(0)
    #     self.state_shape = self.get_state().shape
    #     print(self.state_shape)
    # self.prev_screen_rgb = self.ale.getScreenRGB()
    # screen = self.ale.getScreenRGB()
    # screen = self.preprocess_screen(screen)
    # self.screen_shape = screen.shape
    # self.state_shape = (self.state_len,) + \
    #     self.screen_shape

    def preprocess_screen(self, screen_rgb):
        # observation = cv2.cvtColor(cv2.resize(
        #     screen_rgb, (84, 110)), cv2.COLOR_BGR2GRAY)
        # observation = observation[26:110, :]
        # ret, observation = cv2.threshold(
        #     observation, 1, 255, cv2.THRESH_BINARY)
        # return np.reshape(observation, (84, 84))
        screen = np.dot(screen_rgb, np.array([.299, .587,
                                              .114])).astype(np.uint8)
        screen = ndimage.zoom(screen, (0.4, 0.525))
        screen.resize((84, 84))
        # screen = screen / 255.0
        return np.array(screen)

    def get_state_shape(self):
        return self.state_shape

    def start_episode(self):
        self.ale.reset_game()
        self.episode_reward = 0
        self.states = []
        for _ in range(self.state_len):
            self.states.append(self.preprocess_screen(self.ale.getScreenRGB()))

    def get_episode_reward(self):
        return self.episode_reward

    def get_state(self):
        curr_state = np.stack(self.states, axis=2)
        return curr_state
        # curr_state = np.zeros(self.state_shape)
        # for i in range(self.state_len):
        #     curr_state[i] = self.states[i]
        # return curr_state

    def perform_action(self, action_dist):
        # print(action_ind)
        action = self.actions[np.argmax(action_dist)]
        # self.curr_reward = 0
        # for frame in range(self.hyperparams['frame_skip']):
        #     self.prev_screen = self.ale.getScreenRGB()
        #     self.curr_reward += self.ale.act(action)
        self.curr_reward = self.ale.act(action)
        self.episode_reward += self.curr_reward
        # if not self.curr_reward == 0:
        #     print(bcolors.WARNING +
        #           'Got real reward!' +
        #           bcolors.ENDC)
        if self.curr_reward > 0:
            self.curr_reward = 1
            # print(bcolors.WARNING +
            #       'Got real reward!' +
            #       bcolors.ENDC)
        elif self.curr_reward < 0:
            self.curr_reward = -1
        screen = self.ale.getScreenRGB()
        # screen = np.maximum(screen, self.prev_screen)
        screen = self.preprocess_screen(screen)
        self.states = self.states[:self.state_len - 1]
        self.states.insert(0, screen)

    def start_eval_mode(self):
        # self.init_ale(display=self.show_screen)
        pass

    def end_eval_mode(self):
        # self.init_ale(display=False)
        pass

    def get_reward(self):
        return self.curr_reward

    def episode_is_over(self):
        return self.ale.game_over()

    def get_actions(self):
        return list(range(len(self.actions)))
예제 #2
0
파일: rl.py 프로젝트: agajews/tfbrain
class AtariTask(object):
    def __init__(self, hyperparams, rom_fnm):
        self.hyperparams = hyperparams
        self.show_screen = hyperparams['show_screen']
        self.state_len = hyperparams['state_len']
        # self.screen_resize = hyperparams['screen_resize']
        self.rom_fnm = rom_fnm
        self.init_ale(display=self.show_screen)
        self.actions = self.ale.getMinimalActionSet()
        print('Num possible actions: %d' % len(self.actions))
        self.state_shape = (84, 84, self.state_len)

    def init_ale(self, display=False):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', random.randrange(999))
        # self.ale.setInt(b'delay_msec', 0)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setInt(b'frame_skip', self.hyperparams['frame_skip'])
        if display:
            self.ale.setBool(b'display_screen', True)
        self.ale.loadROM(str.encode(self.rom_fnm))

    # def set_screen_shape(self):
    #     self.start_episode()
    #     # self.perform_action(0)
    #     self.state_shape = self.get_state().shape
    #     print(self.state_shape)
        # self.prev_screen_rgb = self.ale.getScreenRGB()
        # screen = self.ale.getScreenRGB()
        # screen = self.preprocess_screen(screen)
        # self.screen_shape = screen.shape
        # self.state_shape = (self.state_len,) + \
        #     self.screen_shape

    def preprocess_screen(self, screen_rgb):
        screen = cv2.cvtColor(cv2.resize(
            screen_rgb, (84, 110)), cv2.COLOR_BGR2GRAY)
        # observation = observation[26:110, :]
        # ret, observation = cv2.threshold(
        #     observation, 1, 255, cv2.THRESH_BINARY)
        # return np.reshape(observation, (84, 84))
        # screen = np.dot(
        #     screen_rgb, np.array([.299, .587, .114])).astype(np.uint8)
        screen = ndimage.zoom(screen_rgb, (0.4, 0.525))
        screen.resize((84, 84))
        # screen = screen / 255.0
        return np.array(screen)

    def get_state_shape(self):
        return self.state_shape

    def get_actions(self):
        return list(range(len(self.actions)))

    def start_episode(self):
        self.ale.reset_game()
        self.episode_reward = 0
        self.states = []
        for _ in range(self.state_len):
            self.states.append(self.preprocess_screen(
                self.ale.getScreenRGB()))

    def get_episode_reward(self):
        return self.episode_reward

    def get_state(self):
        curr_state = np.stack(self.states, axis=2)
        return curr_state
        # curr_state = np.zeros(self.state_shape)
        # for i in range(self.state_len):
        #     curr_state[i] = self.states[i]
        # return curr_state

    def perform_action(self, action_dist):
        # print(action_ind)
        action = self.actions[np.argmax(action_dist)]
        # self.curr_reward = 0
        # for frame in range(self.hyperparams['frame_skip']):
        #     self.prev_screen = self.ale.getScreenRGB()
        #     self.curr_reward += self.ale.act(action)
        self.curr_reward = self.ale.act(action)
        self.episode_reward += self.curr_reward
        # if not self.curr_reward == 0:
        #     print(bcolors.WARNING +
        #           'Got real reward!' +
        #           bcolors.ENDC)
        if self.curr_reward > 0:
            self.curr_reward = 1
            # print(bcolors.WARNING +
            #       'Got real reward!' +
            #       bcolors.ENDC)
        elif self.curr_reward < 0:
            self.curr_reward = -1
        screen = self.ale.getScreenRGB()
        # screen = np.maximum(screen, self.prev_screen)
        screen = self.preprocess_screen(screen)
        self.states = self.states[:self.state_len - 1]
        self.states.insert(0, screen)

    def start_eval_mode(self):
        # self.init_ale(display=self.show_screen)
        pass

    def end_eval_mode(self):
        # self.init_ale(display=False)
        pass

    def get_reward(self):
        return self.curr_reward

    def episode_is_over(self):
        return self.ale.game_over()
예제 #3
0
파일: ale_test.py 프로젝트: agajews/tfbrain
from ale_python_interface.ale_python_interface import ALEInterface
import numpy as np
# import pygame

ale = ALEInterface()

ale.setInt(b"random_seed", 123)
ale.setBool(b'display_screen', True)
ale.loadROM(str.encode('data/roms/breakout.bin'))

random_seed = ale.getInt(b"random_seed")
print("random_seed: " + str(random_seed))

legal_actions = ale.getMinimalActionSet()

(screen_width, screen_height) = ale.getScreenDims()
print("width/height: " + str(screen_width) + "/" + str(screen_height))

#  init pygame
# pygame.init()
# print(ale.getScreenGrayscale().flatten().shape)
# screen = pygame.display.set_mode((160, 210))
# pygame.display.set_caption("Arcade Learning Environment Random Agent Display")

# pygame.display.flip()

episode = 0
total_reward = 0.0
while (episode < 10):
    a = legal_actions[np.random.randint(legal_actions.size)]
    reward = ale.act(a)
예제 #4
0
파일: ale_test.py 프로젝트: agajews/tfbrain
from ale_python_interface.ale_python_interface import ALEInterface
import numpy as np
# import pygame

ale = ALEInterface()

ale.setInt(b"random_seed", 123)
ale.setBool(b'display_screen', True)
ale.loadROM(str.encode('data/roms/breakout.bin'))

random_seed = ale.getInt(b"random_seed")
print("random_seed: " + str(random_seed))

legal_actions = ale.getMinimalActionSet()

(screen_width, screen_height) = ale.getScreenDims()
print("width/height: " + str(screen_width) + "/" + str(screen_height))

#  init pygame
# pygame.init()
# print(ale.getScreenGrayscale().flatten().shape)
# screen = pygame.display.set_mode((160, 210))
# pygame.display.set_caption("Arcade Learning Environment Random Agent Display")

# pygame.display.flip()

episode = 0
total_reward = 0.0
while(episode < 10):
    a = legal_actions[np.random.randint(legal_actions.size)]
    reward = ale.act(a)
예제 #5
0
class AtariEnvironment:
    
    def __init__(self, args, outputDir):
        
        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq
        
        self.ale = ALEInterface()
        self.ale.setBool(b'sound', False)
        self.ale.setBool(b'display_screen', False)
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        self.ale.loadROM(b'./breakout.bin')

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state
    
    def getGameNumber(self):
        return self.gameNumber
    
    def getFrameNumber(self):
        return self.ale.getFrameNumber()
    
    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()
    
    def getEpisodeStepNumber(self):
        return self.episodeStepNumber
    
    def getStepNumber(self):
        return self.stepNumber
    
    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1
        
        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()
    
            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                #self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber()))


        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0 # environment steps vs ALE frames.  Will probably be 4*frame number