class pyrlcade_environment(object): def init(self,rom_file,ale_frame_skip): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.set("random_seed",123) self.ale.set("disable_color_averaging",1) self.ale.set("frame_skip",ale_frame_skip) self.ale.loadROM(rom_file) self.legal_actions = self.ale.getMinimalActionSet() ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size),dtype=np.uint8) self.ale.getRAM(self.ram) self.state = self.ale.getRAM(self.ram) def reset_state(self): self.ale.reset_game() def set_action(self,a): self.action = a def step(self): self.reward = self.ale.act(self.action) is_terminal = self.ale.game_over() return is_terminal def get_state(self): self.ale.getRAM(self.ram) return self.ram def get_reward(self): return self.reward
class pyrlcade_environment(object): def init(self, rom_file, ale_frame_skip): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.set("random_seed", 123) self.ale.set("disable_color_averaging", 1) self.ale.set("frame_skip", ale_frame_skip) self.ale.loadROM(rom_file) self.legal_actions = self.ale.getMinimalActionSet() ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size), dtype=np.uint8) self.ale.getRAM(self.ram) self.state = self.ale.getRAM(self.ram) def reset_state(self): self.ale.reset_game() def set_action(self, a): self.action = a def step(self): self.reward = self.ale.act(self.action) is_terminal = self.ale.game_over() return is_terminal def get_state(self): self.ale.getRAM(self.ram) return self.ram def get_reward(self): return self.reward
#clear screen screen.fill((0,0,0)) #get atari screen pixels and blit them numpy_surface = np.frombuffer(game_surface.get_buffer(),dtype=np.int32) ale.getScreenRGB(numpy_surface) logger.log(a, TYPE_ACTION, cur_time) #if cur_time %2 == 0: logger.log(numpy_surface, TYPE_SCREEN, cur_time) del numpy_surface screen.blit(pygame.transform.scale2x(game_surface),(0,0)) #get RAM ram_size = ale.getRAMSize() ram = np.zeros((ram_size),dtype=np.uint8) ale.getRAM(ram) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono",32) text = font.render("RAM: " ,1,(255,208,208)) screen.blit(text,(330,10)) font = pygame.font.SysFont("Ubuntu Mono",25) height = font.get_height()*1.2 line_pos = 40 ram_pos = 0 while(ram_pos < 128): ram_string = ''.join(["%02X "%ram[x] for x in range(ram_pos,min(ram_pos+16,128))])
def main(): result = { 'name': [], 'grouped_num': [], 'distribution': [], } result_str = '' # all_game_list = ['air_raid-n', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis'] # all_game_list = ['bank_heist', 'battle_zone', 'beam_rider', 'berzerk-n', 'bowling', 'boxing', 'breakout', 'carnival-n'] # all_game_list = ['centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk'] # all_game_list = ['elevator_action-n', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar'] # all_game_list = ['hero', 'ice_hockey', 'jamesbond', 'journey_escape-n', 'kangaroo', 'krull', 'kung_fu_master'] # all_game_list = ['montezuma_revenge-n', 'ms_pacman', 'name_this_game', 'phoenix-n', 'pitfall-n', 'pong', 'pooyan-n'] # all_game_list = ['private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing-n'] # all_game_list = ['solaris-n', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down'] # all_game_list = ['venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge-n', 'zaxxon'] # all_game_list = ['pong', 'assault','ms_pacman'] all_game_list = ['assault'] for game in all_game_list: if '-n' in game: '''games that are not in the nature DQN list''' continue import atari_py game_path = atari_py.get_game_path(game) game_path = str.encode(game_path) env = ALEInterface() env.setFloat('repeat_action_probability'.encode('utf-8'), 0.0) env.setInt(b'random_seed', 3) env.loadROM(game_path) env.reset_game() if test in ['restoreState']: state_after_reset = env.cloneState() if test in ['restoreSystemState']: state_after_reset = env.cloneSystemState() if test in ['setRAM']: ram_after_reset = env.getRAM() state_after_reset = env.cloneSystemState() ram_candidate = np.load( './stochasticity_ram_mask/{}.npy'.format(game), ) print('=====================================================') try: action_sequence = np.load( './action_sequence/action_sequence_{}_{}.npy'.format( sequence, game, )) print('action_sequence loaded') except Exception as e: '''generate a sequence of actions''' action_sequence = np.random.randint( len(env.getMinimalActionSet()), size=sequence, ) np.save( './action_sequence/action_sequence_{}_{}.npy'.format( sequence, game, ), action_sequence, ) print('action_sequence generated') print('=====================================================') bunch_obs = [] distribution = [] episode_length = -1 state_metrix = [] ram_metrix = [] for bunch_i in range(bunch): if test in ['loadROM']: env.setInt(b'random_seed', bunch_i) env.loadROM(game_path) env.reset_game() elif test in ['restoreState']: env.restoreState(state_after_reset) elif test in ['restoreSystemState']: env.restoreSystemState(state_after_reset) elif test in ['setRAM']: env.reset_game() env.restoreSystemState(state_after_reset) env.setRAM(ram_after_reset) env.setRAM(env.getRAM() * (1 - ram_candidate) + ram_candidate * (bunch_i % 255)) state_sequence = [] ram_sequence = [] has_terminated = False for sequence_i in range(sequence): for frame_skip_i in range(frame_skip): if not has_terminated: env.act(env.getMinimalActionSet()[ action_sequence[sequence_i]]) if env.game_over(): episode_length = sequence_i has_terminated = True if has_terminated: break try: clear_print('[{}|{}|{}]'.format(bunch_i, sequence_i, episode_length)) except Exception as e: pass state_sequence += [env.getScreenRGB()] ram_sequence += [process_ram(env.getRAM())] if has_terminated: break if sequence > 0: if episode_length < 0: # raise Exception('Did not terminated') print('# WARNING: Did not terminated') obs = env.getScreenRGB() state_metrix += [copy.deepcopy(state_sequence)] ram_metrix += [copy.deepcopy(ram_sequence)] if_has_identical_one = False for bunch_obs_i in range(len(bunch_obs)): max_value = np.max(np.abs(obs - bunch_obs[bunch_obs_i])) if max_value < 1: if_has_identical_one = True distribution[bunch_obs_i] += 1 break if if_has_identical_one is False: bunch_obs += [obs] distribution += [1] grouped_num = len(bunch_obs) result_str = '{}game:{} grouped_num:{} distribution:{} \n'.format( result_str, game, grouped_num, distribution, ) try: game_list += [game] except Exception as e: game_list = [game] try: grouped_num_list += [grouped_num] except Exception as e: grouped_num_list = [grouped_num] max_lenth = 0 for bunch_i in range(len(state_metrix)): if len(state_metrix[bunch_i]) > max_lenth: max_lenth = len(state_metrix[bunch_i]) for bunch_i in range(len(state_metrix)): state_metrix[bunch_i] += ([ np.zeros(shape=state_metrix[0][0].shape, dtype=state_metrix[0][0].dtype) ] * (max_lenth - len(state_metrix[bunch_i]))) ram_metrix[bunch_i] += ([ np.zeros(shape=ram_metrix[0][0].shape, dtype=ram_metrix[0][0].dtype) ] * (max_lenth - len(state_metrix[bunch_i]))) state_list = [] state_metrix_id = np.zeros((len(state_metrix), len(state_metrix[0])), dtype=int) for bunch_i in range(len(state_metrix)): for sequence_i in range(len(state_metrix[0])): found_in_state_list = False for state_list_id in range(len(state_list)): if np.max(state_list[state_list_id] - state_metrix[bunch_i][sequence_i]) < 1: state_metrix_id[bunch_i][sequence_i] = state_list_id found_in_state_list = True break if not found_in_state_list: state_list += [np.copy(state_metrix[bunch_i][sequence_i])] state_metrix_id[bunch_i][sequence_i] = (len(state_list) - 1) state_metrix_id_unsorted = np.copy(state_metrix_id) state_metrix_id = state_metrix_id.tolist() state_metrix_id.sort(key=lambda row: row[:], reverse=True) state_metrix_id = np.array(state_metrix_id) fig, ax = plt.subplots() im = ax.imshow(state_metrix_id) plt.show() plt.savefig( './results/{}_state_metrix_id.jpg'.format(game), dpi=600, ) state_metrix_figure = np.zeros( ((10 + state_metrix[0][0].shape[0]) * len(state_metrix), state_metrix[0][0].shape[1] * len(state_metrix[0]), state_metrix[0][0].shape[2]), dtype=state_metrix[0][0].dtype) ram_metrix_figure = np.zeros( ((5 + ram_metrix[0][0].shape[0]) * len(state_metrix), ram_metrix[0][0].shape[1] * len(state_metrix[0]), ram_metrix[0][0].shape[2]), dtype=ram_metrix[0][0].dtype) ram_candidate = list(range(env.getRAMSize())) for bunch_i in range(len(state_metrix)): ram_metrix_figure[((bunch_i) * (5 + ram_metrix[0][0].shape[0])):( 5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])), :, 2] = 255 for bunch_i in range(len(state_metrix)): for sequence_i in range(len(state_metrix[0])): state_metrix_figure[ (10 + (bunch_i) * (10 + state_metrix[0][0].shape[0])):(bunch_i + 1) * (10 + state_metrix[0][0].shape[0]), (sequence_i) * state_metrix[0][0].shape[1]:(sequence_i + 1) * state_metrix[0][0].shape[1]] = state_list[ state_metrix_id[bunch_i][sequence_i]] for bunch_ii in range(state_metrix_id.shape[0]): if np.max(state_metrix_id_unsorted[bunch_ii] - state_metrix_id[bunch_i]) < 1: at_unsorted_bunch = bunch_ii break ram_metrix_figure[( 5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])):(bunch_i + 1) * (5 + ram_metrix[0][0].shape[0]), (sequence_i) * ram_metrix[0][0].shape[1]:(sequence_i + 1) * ram_metrix[0][0].shape[1]] = ram_metrix[ at_unsorted_bunch][sequence_i] for bunch_i in range(len(state_metrix)): for sequence_i in range(len(state_metrix[0])): if bunch_i > 0: if state_metrix_id[bunch_i][sequence_i] != state_metrix_id[ bunch_i - 1][sequence_i]: # draw a line to seperate the bunches previous = ram_metrix_figure[( 5 + (bunch_i - 1) * (5 + ram_metrix[0][0].shape[0])):( (bunch_i) * (5 + ram_metrix[0][0].shape[0])), sequence_i, 0] later = ram_metrix_figure[( 5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])):( (bunch_i + 1) * (5 + ram_metrix[0][0].shape[0])), sequence_i, 0] delta = np.abs(previous - later) state_metrix_figure[( (bunch_i) * (10 + state_metrix[0][0].shape[0])):( 10 + (bunch_i) * (10 + state_metrix[0][0].shape[0])), (sequence_i) * state_metrix[0][0].shape[1]:, 0] = 255 ram_metrix_figure[((bunch_i) * (5 + ram_metrix[0][0].shape[0]) ):(5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])), (sequence_i) * ram_metrix[0][0].shape[1]:, 0] = 255 ram_metrix_figure[((bunch_i) * (5 + ram_metrix[0][0].shape[0]) ):(5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])), (sequence_i) * ram_metrix[0][0].shape[1]:, 1:] = 0 from PIL import Image Image.fromarray(state_metrix_figure).save( "./results/{}_state_metrix_figure.jpeg".format(game)) Image.fromarray(ram_metrix_figure.astype( state_metrix_figure.dtype)).save( "./results/{}_ram_metrix_figure.jpeg".format(game)) print(result_str) print('===============') for game_i in range(len(game_list)): print(game_list[game_i]) for grouped_num_i in range(len(grouped_num_list)): print(grouped_num_list[grouped_num_i])
keys |= pressed[pygame.K_z] << 4 a = key_action_tform_table[keys] reward = ale.act(a) total_reward += reward #clear screen screen.fill((0, 0, 0)) #get atari screen pixels and blit them numpy_surface = np.frombuffer(game_surface.get_buffer(), dtype=np.int32) ale.getScreenRGB(numpy_surface) del numpy_surface screen.blit(pygame.transform.scale2x(game_surface), (0, 0)) #get RAM ram_size = ale.getRAMSize() ram = np.zeros((ram_size), dtype=np.uint8) ale.getRAM(ram) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("RAM: ", 1, (255, 208, 208)) screen.blit(text, (330, 10)) font = pygame.font.SysFont("Ubuntu Mono", 25) height = font.get_height() * 1.2 line_pos = 40 ram_pos = 0 while (ram_pos < 128): ram_string = ''.join(
def main(): result = { 'name': [], 'grouped_num': [], 'distribution': [], } result_str = '' # all_game_list = ['air_raid-n', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis'] # all_game_list = ['bank_heist', 'battle_zone', 'beam_rider', 'berzerk-n', 'bowling', 'boxing', 'breakout', 'carnival-n'] # all_game_list = ['centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk'] # all_game_list = ['elevator_action-n', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar'] # all_game_list = ['hero', 'ice_hockey', 'jamesbond', 'journey_escape-n', 'kangaroo', 'krull', 'kung_fu_master'] # all_game_list = ['montezuma_revenge-n', 'ms_pacman', 'name_this_game', 'phoenix-n', 'pitfall-n', 'pong', 'pooyan-n'] # all_game_list = ['private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing-n'] # all_game_list = ['solaris-n', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down'] # all_game_list = ['venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge-n', 'zaxxon'] all_game_list = ['assault'] for game in all_game_list: if '-n' in game: '''games that are not in the nature DQN list''' continue import atari_py game_path = atari_py.get_game_path(game) game_path = str.encode(game_path) env = ALEInterface() env.setFloat('repeat_action_probability'.encode('utf-8'), 0.0) env.setInt(b'random_seed', 3) env.loadROM(game_path) env.reset_game() print('=====================================================') try: action_sequence = np.load( './action_sequence/action_sequence_{}_{}.npy'.format( sequence, game, )) print('action_sequence loaded') except Exception as e: '''generate a sequence of actions''' action_sequence = np.random.randint( len(env.getMinimalActionSet()), size=sequence, ) np.save( './action_sequence/action_sequence_{}_{}.npy'.format( sequence, game, ), action_sequence, ) print('action_sequence generated') print('=====================================================') state_sequence_base = [] ram_sequence_base = [] has_terminated = False for sequence_i in range(sequence): state_sequence_base += [env.getScreenRGB()] ram_sequence_base += [env.getRAM()] for frame_skip_i in range(frame_skip): if not has_terminated: env.act( env.getMinimalActionSet()[action_sequence[sequence_i]]) if env.game_over(): episode_length = sequence_i has_terminated = True if has_terminated: break if has_terminated: break if has_terminated in [False]: raise Exception('sequence length is not enough') ram_candidate = np.ones((env.getRAMSize()), dtype=np.uint8) state_sequence_branch = [] ram_sequence_branch = [] for bunch_i in range(bunch): env.setInt(b'random_seed', bunch_i) env.loadROM(game_path) env.reset_game() has_terminated = False for sequence_i in range(sequence): state_sequence_branch += [env.getScreenRGB()] ram_sequence_branch += [env.getRAM()] if sequence_i > 0: max_value = np.max( np.abs(env.getScreenRGB() - state_sequence_base[sequence_i])) if max_value > 0: delta_ram = np.sign( np.abs(ram_sequence_branch[sequence_i - 1] - ram_sequence_base[sequence_i - 1])) ram_candidate *= delta_ram remain = np.sum(ram_candidate) print('remain {} bytes'.format(remain)) if remain <= 1: if remain == 1: print(ram_candidate) np.save( './stochasticity_ram_mask/{}.npy'.format( game), ram_candidate, ) raise Exception('done') else: raise Exception('error') has_terminated = True if has_terminated: break for frame_skip_i in range(frame_skip): if not has_terminated: env.act(env.getMinimalActionSet()[ action_sequence[sequence_i]]) if env.game_over(): has_terminated = True if has_terminated: break if has_terminated: break
def __init__(self, random_seed, frame_skip, repeat_action_probability, sound, display_screen, block_state_repr=None, enemy_state_repr=None, friendly_state_repr=None): ale = ALEInterface() # Get & Set the desired settings if random_seed is not None: ale.setInt('random_seed', random_seed) ale.setInt('frame_skip', frame_skip) ale.setFloat('repeat_action_probability', repeat_action_probability) if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', sound) ale.setBool('display_screen', display_screen) # Load the ROM file ale.loadROM('qbert.bin') # Get the list of legal actions legal_actions = ale.getLegalActionSet() minimal_actions = ale.getMinimalActionSet() logging.debug('Legal actions: {}'.format( [action_number_to_name(a) for a in legal_actions])) logging.debug('Minimal actions: {}'.format( [action_number_to_name(a) for a in minimal_actions])) width, height = ale.getScreenDims() rgb_screen = np.empty([height, width, 3], dtype=np.uint8) ram_size = ale.getRAMSize() ram = np.zeros(ram_size, dtype=np.uint8) # ALE components self.ale = ale self.lives = ale.lives() self.rgb_screen = rgb_screen self.ram_size = ale.getRAMSize() self.ram = ram # Verbose state representation self.desired_color = COLOR_YELLOW self.block_colors = INITIAL_COLORS self.enemies = INITIAL_ENEMY_POSITIONS self.friendlies = INITIAL_FRIENDLY_POSITIONS self.discs = INITIAL_DISCS self.current_row, self.current_col = 0, 0 self.level = 1 self.enemy_present = False self.friendly_present = False self.block_state_repr = block_state_repr self.enemy_state_repr = enemy_state_repr self.friendly_state_repr = friendly_state_repr self.num_colored_blocks = 0
class KungFuMaster(object): def __init__( self, rom='/home/josema/AI/ALE/Arcade-Learning-Environment/Roms/kung_fu_master.bin', trainsessionname='test'): self.agent = None self.isAuto = True self.gui_visible = False self.userquit = False self.optimalPolicyUser = False # optimal policy set by user self.trainsessionname = trainsessionname self.elapsedtime = 0 # elapsed time for this experiment self.keys = 0 # Configuration self.pause = False # game is paused self.debug = False self.sleeptime = 0.0 self.command = 0 self.iteration = 0 self.cumreward = 0 self.cumreward100 = 0 # cum reward for statistics self.cumscore100 = 0 self.ngoalreached = 0 self.max_level = 1 self.hiscore = 0 self.hireward = -1000000 self.resfile = open("data/" + self.trainsessionname + ".dat", "a+") self.legal_actions = 0 self.rom = rom self.key_status = [] def init(self, agent): # init after creation (uses args set from cli) self.ale = ALEInterface() self.ale.setInt('random_seed', 123) ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size), dtype=np.uint8) if (self.gui_visible): os.environ['SDL_VIDEO_CENTERED'] = '1' if sys.platform == 'darwin': pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): pygame.init() self.ale.setBool('sound', True) self.ale.setBool('display_screen', False) self.ale.loadROM(self.rom) self.legal_actions = self.ale.getLegalActionSet() if (self.gui_visible): (self.screen_width, self.screen_height) = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) (display_width, display_height) = (1024, 420) self.screen = pygame.display.set_mode( (display_width, display_height)) pygame.display.set_caption( "Reinforcement Learning - Sapienza - Jose M Salas") self.numpy_surface = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.game_surface = pygame.Surface( (self.screen_width, self.screen_height)) pygame.display.flip() #init clock self.clock = pygame.time.Clock() self.agent = agent self.nactions = len( self.legal_actions ) # 0: not moving, 1: left, 2: right, 3: up, 4: down for i in range(self.nactions): self.key_status.append(False) print(self.nactions) # ns = 89999 # Number of statuses if we use enemy type ram info without level number #FINAL ns = 489999 # Number of statuses if we use enemy type ram info ns = 4899999 # Number of statuses if we use enemy type ram info # ns = 48999 print('Number of states: %d' % ns) self.agent.init(ns, self.nactions) # 1 for RA not used here def initScreen(self): if (self.gui_visible): if sys.platform == 'darwin': pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): pygame.init() self.ale.setBool('sound', True) self.ale.setBool('display_screen', False) if (self.gui_visible): (self.screen_width, self.screen_height) = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) (display_width, display_height) = (1024, 420) self.screen = pygame.display.set_mode( (display_width, display_height)) pygame.display.set_caption( "Reinforcement Learning - Sapienza - Jose M Salas") self.numpy_surface = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.game_surface = pygame.Surface( (self.screen_width, self.screen_height)) pygame.display.flip() #init clock self.clock = pygame.time.Clock() def reset(self): self.pos_x = 0 self.pos_y = 0 # Kung fu master observations self.enemy_pos = 0 self.n_enemies = 0 self.my_pos = 0 self.danger_pos = 0 self.danger_type = 0 self.enemy_type = 0 # 0, 1, 2, 3, 80, 81, 82, 40 self.blocked = 0 self.prev_blocked = 0 self.hold_hit = 0 self.time_left1 = 0 self.time_left2 = 0 self.my_energy = 39 self.previous_my_energy = 39 self.lifes = 3 self.previous_lifes = 3 self.got_hit = 0 self.got_blocked = 0 self.got_unblocked = 0 self.still_blocked = False self.starting_pos = 0 self.level = 1 self.score = 0 self.cumreward = 0 self.cumscore = 0 self.action_reward = 0 self.current_reward = 0 # accumulate reward over all events happened during this action until next different state self.prev_state = None # previous state self.firstAction = True # first action of the episode self.finished = False # episode finished self.newstate = True # new state reached self.numactions = 0 # number of actions in this episode self.iteration += 1 self.agent.optimal = self.optimalPolicyUser or ( self.iteration % 100 ) == 0 # False #(random.random() < 0.5) # choose greedy action selection for the entire episode def pair_function(self): # Combine the number of enemies, player blocked and danger type information into 7 different states if self.n_enemies > 0: self.danger_type = 0 # print (str(self.n_enemies) + " - " + str(self.danger_type) + ' - ' + str(self.blocked)) pair = (int)( (0.5 * (self.n_enemies + self.danger_type) * (self.n_enemies + self.danger_type + 1) + self.danger_type + 1) * (1 - (self.blocked / 128))) if pair > 8: return 5 #game not started yet else: return pair def enemy_type_s(self): if self.enemy_type > 127: return (self.enemy_type - 128 + 4) elif self.enemy_type == 64: return 8 else: return self.enemy_type def getstate(self): # print ('enemy type: ' + str(self.enemy_type_s()) + 'level: ' + str(self.level -1) ) x = (int)((self.level - 1) * 1000000 + self.pair_function() * 100000 + (self.enemy_type_s() * 10000) + np.rint(self.my_pos / 32) * 1000 + np.rint(self.enemy_pos / 32) * 100 + np.rint(self.danger_pos / 32) * 10 + np.rint(self.hold_hit / 16)) #3FINAL x = (int)((self.enemy_type_s()*1000) + (self.level-1)*100000 + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) #2NO LEVEL x = (int)((self.enemy_type_s()*1000) + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) #1NO ENEMY TYPE x = (int)((self.level-1)*10000 + self.pair_function()*1000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) return x def goal_reached(self): #return (self.my_energy>0 and self.time_left1==0 and self.time_left2<5) #and self.my_energy==39) return (self.level == 5) def update(self, a): self.command = a # Update RAM self.ale.getRAM(self.ram) # Get info from RAM self.enemy_pos = self.ram[72] self.n_enemies = self.ram[91] self.danger_pos = self.ram[73] self.my_pos = self.ram[74] self.hold_hit = self.ram[77] self.enemy_type = self.ram[54] if self.level < self.ram[31]: self.starting_pos = self.ram[74] self.level = self.ram[31] self.max_level = max(self.level, self.max_level) # Danger/Enemy position: # 49 = no danger # 50 = danger approaching from left # 208 = danger approaching from right # ram[96] = 6, danger comes from top # ram[96] = 29, danger comes from bottom # ram[96] = 188, none if self.ram[96] == 6: self.danger_type = 0 elif self.ram[96] == 29: self.danger_type = 1 else: self.danger_type = 2 self.time_left1 = self.ram[27] self.time_left2 = self.ram[28] self.previous_my_energy = self.my_energy self.my_energy = self.ram[75] if self.my_energy < self.previous_my_energy and not self.still_blocked and self.ram[ 34] == 0: self.got_hit = STATES['GotHit'] else: self.got_hit = 0 self.previous_lifes = self.lifes self.lifes = self.ram[29] self.prev_blocked = self.blocked self.blocked = self.ram[61] if self.blocked > self.prev_blocked and not self.still_blocked: self.got_blocked = STATES['GotBlocked'] self.still_blocked = True self.got_unblocked = 0 elif self.blocked < self.prev_blocked and self.still_blocked: self.got_unblocked = STATES['GotUnblocked'] self.still_blocked = False self.got_blocked = 0 else: self.got_blocked = 0 self.got_unblocked = 0 # print ('enemy_pos=' +str(self.enemy_pos) + ' - danger_pos=' + str(self.danger_pos) + ' - my_position=' # + str(self.my_pos) + ' - my_energy=' + str(self.my_energy) + ' - blocked=' + str(self.blocked) + ' - danger_type=' + str(self.danger_type)) self.prev_state = self.getstate() # remember previous state # print " == Update start ",self.prev_state," action",self.command self.current_reward = 0 # accumulate reward over all events happened during this action until next different state #print('self.current_reward = 0') self.numactions += 1 # total number of actions axecuted in this episode # while (self.prev_state == self.getstate()): if (self.firstAction): self.starting_pos = self.ram[74] self.firstAction = False self.current_reward = self.ale.act(a) else: self.current_reward = self.ale.act(a) if self.ram[34] == 0: #only when playing if (a == 3 and self.starting_pos < self.my_pos) or ( a == 4 and self.starting_pos > self.my_pos): self.action_reward = STATES['MoveFW'] elif (a == 3 and self.starting_pos > self.my_pos) or ( a == 4 and self.starting_pos < self.my_pos): self.action_reward = STATES['MoveBW'] else: self.action_reward = STATES['NotMoving'] self.score += self.current_reward self.current_reward += self.action_reward # print('score= ' + str(self.score) + ' current reward=' +str(np.rint(self.current_reward))+ ' - energy=' + str(self.my_energy/39.0) + # ' - got_hot='+ str(self.got_hit) + ' - got_blocked=' + str(self.got_blocked) + ' - got_unblocked=' + str(self.got_unblocked)) # check if episode terminated #self.draw_screen if self.goal_reached(): self.current_reward += STATES['Alive'] self.ngoalreached += 1 #self.ale.reset_game() self.finished = True if (self.ale.game_over()): self.current_reward += STATES['Dead'] if self.level > 1: print('game over in level ' + str(self.level)) if self.my_energy > 0 and self.lifes == 3: print('Game over alive????') self.ale.reset_game() self.finished = True if self.level > 2: if self.gui_visible == False: self.gui_visible = True self.initScreen() #print " ** Update end ",self.getstate(), " prev ",self.prev_state def input(self): self.isPressed = False if self.gui_visible: for event in pygame.event.get(): if event.type == pygame.QUIT: return False if event.type == pygame.KEYDOWN: if event.key == pygame.K_SPACE: self.pause = not self.pause print "Game paused: ", self.pause elif event.key == pygame.K_a: self.isAuto = not self.isAuto self.sleeptime = int(self.isAuto) * 0.07 elif event.key == pygame.K_s: self.sleeptime = 1.0 self.agent.debug = False elif event.key == pygame.K_d: self.sleeptime = 0.07 self.agent.debug = False elif event.key == pygame.K_f: self.sleeptime = 0.005 self.agent.debug = False elif event.key == pygame.K_g: self.sleeptime = 0.0 self.agent.debug = False elif event.key == pygame.K_o: self.optimalPolicyUser = not self.optimalPolicyUser print "Best policy: ", self.optimalPolicyUser elif event.key == pygame.K_q: self.userquit = True print "User quit !!!" else: pressed = pygame.key.get_pressed() self.keys = 0 self.keys |= pressed[pygame.K_UP] self.keys |= pressed[pygame.K_DOWN] << 1 self.keys |= pressed[pygame.K_LEFT] << 2 self.keys |= pressed[pygame.K_RIGHT] << 3 self.keys |= pressed[pygame.K_z] << 4 self.command = key_action_tform_table[self.keys] self.key_status[self.command] = True if event.type == pygame.KEYUP: pressed = pygame.key.get_pressed() self.keys = 0 self.keys |= pressed[pygame.K_UP] self.keys |= pressed[pygame.K_DOWN] << 1 self.keys |= pressed[pygame.K_LEFT] << 2 self.keys |= pressed[pygame.K_RIGHT] << 3 self.keys |= pressed[pygame.K_z] << 4 self.command = key_action_tform_table[self.keys] self.key_status[self.command] = False if not (True in self.key_status): self.command = 0 return True def getUserAction(self): return self.command def getreward(self): r = np.rint( self.current_reward ) + self.got_hit + self.got_blocked + self.got_unblocked - np.rint( self.blocked / 128) self.cumreward += r return r def print_report(self, printall=False): toprint = printall ch = ' ' if (self.agent.optimal): ch = '*' toprint = True s = 'Iter %6d, sc: %3d, l: %d, na: %4d, r: %5d %c' % ( self.iteration, self.score, self.level, self.numactions, self.cumreward, ch) if self.score > self.hiscore: self.hiscore = self.score s += ' HISCORE ' toprint = True if self.cumreward > self.hireward: self.hireward = self.cumreward s += ' HIREWARD ' toprint = True if (toprint): print(s) self.cumreward100 += self.cumreward self.cumscore100 += self.score numiter = 100 if (self.iteration % numiter == 0): #self.doSave() pgoal = float(self.ngoalreached * 100) / numiter print( '----------------------------------------------------------------------------------------------------------------------' ) print( "%s %6d avg last 100: reward %d | score %.2f | level %d | p goals %.1f %%" % (self.trainsessionname, self.iteration, self.cumreward100 / 100, float(self.cumscore100) / 100, self.max_level, pgoal)) print( '----------------------------------------------------------------------------------------------------------------------' ) self.cumreward100 = 0 self.cumscore100 = 0 self.ngoalreached = 0 sys.stdout.flush() self.resfile.write( "%d,%d,%d,%d\n" % (self.score, self.cumreward, self.goal_reached(), self.numactions)) self.resfile.flush() def draw(self): if self.gui_visible: self.screen.fill((0, 0, 0)) self.ale.getScreenRGB(self.numpy_surface) pygame.surfarray.blit_array( self.game_surface, np.transpose(self.numpy_surface, (1, 0, 2))) # pygame.pixelcopy.array_to_surface(self.game_surface, np.transpose(self.numpy_surface,(1,0,2))) self.screen.blit( pygame.transform.scale2x( pygame.transform.scale( self.game_surface, (self.screen_height, self.screen_height))), (0, 0)) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("RAM: ", 1, (255, 208, 208)) self.screen.blit(text, (430, 10)) font = pygame.font.SysFont("Ubuntu Mono", 25) height = font.get_height() * 1.2 line_pos = 40 ram_pos = 0 while (ram_pos < 128): ram_string = ''.join([ "%02X " % self.ram[x] for x in range(ram_pos, min(ram_pos + 16, 128)) ]) text = font.render(ram_string, 1, (255, 255, 255)) self.screen.blit(text, (440, line_pos)) line_pos += height ram_pos += 16 #display current action font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("Current Action: " + str(self.command), 1, (208, 208, 255)) height = font.get_height() * 1.2 self.screen.blit(text, (430, line_pos)) line_pos += height #display reward font = pygame.font.SysFont("Ubuntu Mono", 30) text = font.render("Total Reward: " + str(self.cumreward), 1, (208, 255, 255)) self.screen.blit(text, (430, line_pos)) pygame.display.flip() # clock.tick(60.) else: return 0 def quit(self): self.resfile.close() pygame.quit()