def __init__(self, env_id, roms_path, difficulty=3, frame_ratio=3, frames_per_step=3, render=True, throttle=False, debug=False): self.difficulty = difficulty self.frame_ratio = frame_ratio self.frames_per_step = frames_per_step self.throttle = throttle self.emu = Emulator(env_id, roms_path, "sfiii3n", setup_memory_addresses(), frame_ratio=frame_ratio, render=render, throttle=throttle, debug=debug) self.started = False self.expected_health = {"P1": 0, "P2": 0} self.expected_wins = {"P1": 0, "P2": 0} self.round_done = False self.stage_done = False self.game_done = False self.stage = 1
def run_emulator(env_id, game_id, roms_path, memory_addresses, output_queue): emulator = None try: emulator = Emulator(env_id, roms_path, game_id, memory_addresses) output_queue.put(emulator.step([])) finally: emulator.close()
def test_step(self): memory_addresses = {"test": Address("02000008", "u8")} game_id = "sfiii3n" emulator = None try: emulator = Emulator("testEnv1", game_id, memory_addresses) data = emulator.step([]) assert_that(data["frame"].shape, equal_to((224, 384, 3))) assert_that(data["test"], equal_to(0)) finally: emulator.close()
def test_screen_dimensions(self): memory_addresses = {"test": Address("02000008", "u8")} game_id = "sfiii3n" roms_path = "/home/michael/dev/MAMEToolkit/MAMEToolkit/emulator/mame/roms" emulator = None try: emulator = Emulator("testEnv1", roms_path, game_id, memory_addresses) assert_that(emulator.screenDims["width"], equal_to(384)) assert_that(emulator.screenDims["height"], equal_to(224)) finally: emulator.close()
def test_step(self): memory_addresses = {"test": Address("02000008", "u8")} game_id = "sfiii3n" roms_path = "/home/michael/dev/MAMEToolkit/MAMEToolkit/emulator/mame/roms" emulator = None try: emulator = Emulator("testEnv1", roms_path, game_id, memory_addresses) data = emulator.step([]) assert_that(data["frame"].shape, equal_to((224, 384, 3))) assert_that(data["test"], equal_to(0)) finally: emulator.close()
class Environment(object): # env_id - the unique identifier of the emulator environment, used to create fifo pipes # difficulty - the difficult to be used in story mode gameplay # frame_ratio, frames_per_step - see Emulator class # render, throttle, debug - see Console class def __init__(self, env_id, roms_path, difficulty=3, frame_ratio=3, frames_per_step=3, render=True, throttle=False, debug=False): self.difficulty = difficulty self.frame_ratio = frame_ratio self.frames_per_step = frames_per_step self.throttle = throttle self.emu = Emulator(env_id, roms_path, "sfiii3n", setup_memory_addresses(), frame_ratio=frame_ratio, render=render, throttle=throttle, debug=debug) self.started = False self.expected_health = {"P1": 0, "P2": 0} self.expected_wins = {"P1": 0, "P2": 0} self.round_done = False self.stage_done = False self.game_done = False self.stage = 1 # Runs a set of action steps over a series of time steps # Used for transitioning the emulator through non-learnable gameplay, aka. title screens, character selects def run_steps(self, steps): for step in steps: for i in range(step["wait"]): self.emu.step([]) self.emu.step([action.value for action in step["actions"]]) # Must be called first after creating this class # Sends actions to the game until the learnable gameplay starts # Returns the first few frames of gameplay def start(self): if self.throttle: for i in range(int(250 / self.frame_ratio)): self.emu.step([]) self.run_steps(set_difficulty(self.frame_ratio, self.difficulty)) self.run_steps(start_game(self.frame_ratio)) frames = self.wait_for_fight_start() self.started = True return frames # Observes the game and waits for the fight to start def wait_for_fight_start(self): data = self.emu.step([]) while data["fighting"] == 0: data = self.emu.step([]) self.expected_health = {"P1": data["healthP1"], "P2": data["healthP2"]} data = self.gather_frames([]) return data["frame"] def reset(self): if self.game_done: return self.new_game() elif self.stage_done: return self.next_stage() elif self.round_done: return self.next_round() else: raise EnvironmentError("Reset called while gameplay still running") # To be called when a round finishes # Performs the necessary steps to take the agent to the next round of gameplay def next_round(self): self.round_done = False self.expected_health = {"P1": 0, "P2": 0} return self.wait_for_fight_start() # To be called when a game finishes # Performs the necessary steps to take the agent(s) to the next game and resets the necessary book keeping variables def next_stage(self): self.wait_for_continue() self.run_steps(next_stage(self.frame_ratio)) self.expected_health = {"P1": 0, "P2": 0} self.expected_wins = {"P1": 0, "P2": 0} self.round_done = False self.stage_done = False return self.wait_for_fight_start() def new_game(self): self.wait_for_continue() self.run_steps(new_game(self.frame_ratio)) self.expected_health = {"P1": 0, "P2": 0} self.expected_wins = {"P1": 0, "P2": 0} self.round_done = False self.stage_done = False self.game_done = False self.stage = 1 return self.wait_for_fight_start() # Steps the emulator along until the screen goes black at the very end of a game def wait_for_continue(self): data = self.emu.step([]) if self.frames_per_step == 1: while data["frame"].sum() != 0: data = self.emu.step([]) else: while data["frame"][0].sum() != 0: data = self.emu.step([]) # Steps the emulator along until the round is definitely over def run_till_victor(self, data): while self.expected_wins["P1"] == data[ "winsP1"] and self.expected_wins["P2"] == data["winsP2"]: data = add_rewards(data, self.sub_step([])) self.expected_wins = {"P1": data["winsP1"], "P2": data["winsP2"]} return data # Checks whether the round or game has finished def check_done(self, data): if data["fighting"] == 0: data = self.run_till_victor(data) self.round_done = True if data["winsP1"] == 2: self.stage_done = True self.stage += 1 if data["winsP2"] == 2: self.game_done = True return data # Collects the specified amount of frames the agent requires before choosing an action def gather_frames(self, actions): data = self.sub_step(actions) frames = [data["frame"]] for i in range(self.frames_per_step - 1): data = add_rewards(data, self.sub_step(actions)) frames.append(data["frame"]) data["frame"] = frames[0] if self.frames_per_step == 1 else frames return data # Steps the emulator along by one time step and feeds in any actions that require pressing # Takes the data returned from the step and updates book keeping variables def sub_step(self, actions): data = self.emu.step([action.value for action in actions]) p1_diff = (self.expected_health["P1"] - data["healthP1"]) p2_diff = (self.expected_health["P2"] - data["healthP2"]) self.expected_health = {"P1": data["healthP1"], "P2": data["healthP2"]} rewards = {"P1": (p2_diff - p1_diff), "P2": (p1_diff - p2_diff)} data["rewards"] = rewards return data # Steps the emulator along by the requested amount of frames required for the agent to provide actions def step(self, move_action, attack_action): if self.started: if not self.round_done and not self.stage_done and not self.game_done: actions = [] if random.random() > 0.3: actions += index_to_move_action(move_action) actions += index_to_attack_action(attack_action) else: actions += index_to_super_action( (move_action + attack_action) % 6) data = self.gather_frames(actions) data = self.check_done(data) return data["frame"], data[ "rewards"], self.round_done, self.stage_done, self.game_done else: raise EnvironmentError( "Attempted to step while characters are not fighting") else: raise EnvironmentError("Start must be called before stepping") # Safely closes emulator def close(self): self.emu.close()
def __init__(self, env_id, roms_path, self_play=True, vs_rounds_before_single_player=2, player='P1', character = 19, frame_ratio=2, frames_per_step=1, render=True, throttle=False, debug=True, new_training=True): self.p1_character_selection = character # 'Scorpion' (default) is 6 #19 and over for random character self.p2_character_selection = character # This will need Xvfb X11 virtual server installed on GNU/Linux system if not render: os.system("Xvfb :1 -screen 0 800x600x16 +extension RANDR &") os.environ["DISPLAY"] = ":1" self.env_id = env_id self.self_play = self_play self.frame_ratio = frame_ratio self.frames_per_step = frames_per_step self.throttle = throttle self.debug = debug self.emu = Emulator(env_id, roms_path, "umk3", setup_memory_addresses(), frame_ratio=frame_ratio, render=render, throttle=throttle, debug=debug) self.started = False self.expected_time_remaining = 0 self.time_remaining = 0 self.done = False self.round_done = False self.stage_done = False self.game_over = False self.game_completed = False self.finished_single_player = False self.num_total_characters = umk3_num_total_characters # Create two Player objects with useful values for each player (a player can be an agent or the game's AI) self.P1 = Player('P1', self.p1_character_selection, self.num_total_characters) self.P2 = Player('P2', self.p2_character_selection, self.num_total_characters) self.env_player = player # If environment's agent controls 'P1', 'P2' or both in 2 Player self-play 'Vs' # Player is initialized to 'Vs' if self_play is True if self.self_play is True: self.env_player = 'Vs' # Number of self-play Vs rounds to be played before the agent plays a single player benchmark in story mode self.vs_rounds_before_single_player = vs_rounds_before_single_player self.current_vs_rounds_before_single_player = vs_rounds_before_single_player # Stage is counting only if playing in single-player mode # If 'Vs' mode, Stage remains at 0 self.stage = 0 if player is 'Vs' else 1 self.last_vs_game_winner = '' self.highest_stage = 0 self.path = 'Novice' self.difficulty = 0 self.expected_difficulty = 0 # Assumes UMK3 starts with Very Easy pre-selected self.paths = ['Novice', 'Warrior', 'Master', 'MasterII'] self.difficulties = ['Very Easy', 'Easy', 'Medium', 'Hard', 'Very Hard'] # If the game somehow reaches round 5 in a stage (3 draws), it resets after round 5 ends self.round_this_stage = 1 self.reset_this_round = False self.total_episodes_played = 0 self.memory_values = [] # Track if any milsetones have been logged to decide if to create a new file or append self.new_training = new_training if not self.new_training and os.path.exists(os.path.join('milestones', self.env_id + '_milestones.txt')): self.logged_first_milestone = True else: self.logged_first_milestone = False
class Environment(object): def __init__(self, env_id, roms_path, self_play=True, vs_rounds_before_single_player=2, player='P1', character = 19, frame_ratio=2, frames_per_step=1, render=True, throttle=False, debug=True, new_training=True): self.p1_character_selection = character # 'Scorpion' (default) is 6 #19 and over for random character self.p2_character_selection = character # This will need Xvfb X11 virtual server installed on GNU/Linux system if not render: os.system("Xvfb :1 -screen 0 800x600x16 +extension RANDR &") os.environ["DISPLAY"] = ":1" self.env_id = env_id self.self_play = self_play self.frame_ratio = frame_ratio self.frames_per_step = frames_per_step self.throttle = throttle self.debug = debug self.emu = Emulator(env_id, roms_path, "umk3", setup_memory_addresses(), frame_ratio=frame_ratio, render=render, throttle=throttle, debug=debug) self.started = False self.expected_time_remaining = 0 self.time_remaining = 0 self.done = False self.round_done = False self.stage_done = False self.game_over = False self.game_completed = False self.finished_single_player = False self.num_total_characters = umk3_num_total_characters # Create two Player objects with useful values for each player (a player can be an agent or the game's AI) self.P1 = Player('P1', self.p1_character_selection, self.num_total_characters) self.P2 = Player('P2', self.p2_character_selection, self.num_total_characters) self.env_player = player # If environment's agent controls 'P1', 'P2' or both in 2 Player self-play 'Vs' # Player is initialized to 'Vs' if self_play is True if self.self_play is True: self.env_player = 'Vs' # Number of self-play Vs rounds to be played before the agent plays a single player benchmark in story mode self.vs_rounds_before_single_player = vs_rounds_before_single_player self.current_vs_rounds_before_single_player = vs_rounds_before_single_player # Stage is counting only if playing in single-player mode # If 'Vs' mode, Stage remains at 0 self.stage = 0 if player is 'Vs' else 1 self.last_vs_game_winner = '' self.highest_stage = 0 self.path = 'Novice' self.difficulty = 0 self.expected_difficulty = 0 # Assumes UMK3 starts with Very Easy pre-selected self.paths = ['Novice', 'Warrior', 'Master', 'MasterII'] self.difficulties = ['Very Easy', 'Easy', 'Medium', 'Hard', 'Very Hard'] # If the game somehow reaches round 5 in a stage (3 draws), it resets after round 5 ends self.round_this_stage = 1 self.reset_this_round = False self.total_episodes_played = 0 self.memory_values = [] # Track if any milsetones have been logged to decide if to create a new file or append self.new_training = new_training if not self.new_training and os.path.exists(os.path.join('milestones', self.env_id + '_milestones.txt')): self.logged_first_milestone = True else: self.logged_first_milestone = False # Runs a set of action steps over a series of time steps # Used for transitioning the emulator through non-learnable gameplay, aka. title screens, character selects def run_steps(self, steps): for step in steps: for i in range(step["wait"]): self.emu.step([]) self.emu.step([action.value for action in step["actions"]]) def new_game(self): assert (self.env_player is 'P1' or self.env_player is 'P2' or self.env_player is 'Vs') self.expected_difficulty, difficulty_steps = set_difficulty(self.frame_ratio, self.difficulty, self.expected_difficulty) self.run_steps(difficulty_steps) # If player is P1, execute steps to start story-mode with Player 1 if self.env_player is 'P1': # Select single-player character, only if environment is not in self-play mode # If environment is in self-play, character is pre-selected as the one chosen by the player # who won the last Vs game if not self.self_play: self.run_steps(p1_start_game(self.frame_ratio)) p1_character_select_steps, p1_character_index = \ p1_select_character(self.frame_ratio, self.p1_character_selection) self.P1.set_character(p1_character_index) if self.debug: print(">Debug: Starting a new single-player game... Difficulty: " + self.difficulties[self.difficulty] + ", Path: " + self.path + ", Player: " + self.env_player + ", Character: " + self.P1.character_name + " \n") self.run_steps(p1_character_select_steps) else: if self.debug: print(">Debug: Continuing to single-player game from Vs... Difficulty: " + self.difficulties[self.difficulty] + ", Path: " + self.path + ", Player: " + self.env_player + ", Character: " + self.P1.character_name + " \n") # Select story mode path self.run_steps(p1_select_path(self.frame_ratio, self.path)) # If player is P2, execute steps to start story-mode with Player 2 elif self.env_player is 'P2': # Select single-player character, only if environment is not in self-play mode # If environment is in self-play, character is pre-selected as the one chosen by the player # who won the last Vs game if not self.self_play: self.run_steps(p2_start_game(self.frame_ratio)) p2_character_select_steps, p2_character_index = \ p2_select_character(self.frame_ratio, self.p2_character_selection) self.P2.set_character(p2_character_index) if self.debug: print(">Debug: Starting a new single-player game... Difficulty: " + self.difficulties[self.difficulty] + ", Path: " + self.path + ", Player: " + self.env_player + ", Character: " + self.P2.character_name + " \n") self.run_steps(p2_character_select_steps) else: if self.debug: print(">Debug: Continuing to single-player game from Vs... Difficulty: " + self.difficulties[self.difficulty] + ", Path: " + self.path + ", Player: " + self.env_player + ", Character: " + self.P2.character_name + " \n") # Select story mode path self.run_steps(p2_select_path(self.frame_ratio, self.path)) # If player is set to Vs (in Self-Play mode), execute steps to start 2-Player Vs mode else: self.run_steps(p1_and_p2_vs_start_game(self.frame_ratio)) p1_character_select_steps, p1_character_index = p1_select_character( self.frame_ratio, self.p1_character_selection) self.P1.set_character(p1_character_index) p2_character_select_steps, p2_character_index = p2_select_character( self.frame_ratio, self.p2_character_selection) self.P2.set_character(p2_character_index) if self.debug: print(">Debug: Starting a new Vs game... Mode: " + self.env_player + ", P1 Character: " + self.P1.character_name + ", P2 Character: " + self.P2.character_name + " \n") self.run_steps(p1_character_select_steps) self.run_steps(p2_character_select_steps) self.wait_for_fight_start() self.P1.expected_health, self.P2.expected_health = 0, 0 #self.expected_health = {"P1": 0, "P2": 0} self.P1.expected_wins, self.P2.expected_wins = 0, 0 #self.expected_wins = {"P1": 0, "P2": 0} self.P1.expected_wins_check_done, self.P2.expected_wins_check_done = 0, 0 #self.expected_wins_check_done = {"P1": 0, "P2": 0} self.done = False self.round_done = False self.stage_done = False self.game_over = False self.game_completed = False self.started = True self.expected_time_remaining = 0 self.time_remaining = 0 self.round_this_stage = 1 self.P1.total_rewards_this_round, self.P2.total_rewards_this_round = 0, 0 #self.total_rewards_this_round = {"P1": 0, "P2": 0} self.P1.total_rewards_this_game, self.P2.total_rewards_this_game = 0, 0 #self.total_rewards_this_game = 0 self.finished_single_player = False self.reset_this_round = False # Stage is counting only if playing in single-player mode # If 'Vs' mode, Stage remains at 0 self.stage = 0 if self.env_player is 'Vs' else 1 # Must be called first after creating this class # Sends actions to the game until the learnable gameplay starts def start(self): if self.throttle: for i in range(int(250 / self.frame_ratio)): self.emu.step([]) self.new_game() def wait_for_fight_start(self): data = self.emu.step([]) while data["fighting"] == 0: data = self.emu.step([]) def next_stage(self): self.P1.expected_health, self.P2.expected_health = 0, 0 #self.expected_health = {"P1": 0, "P2": 0} self.P1.expected_wins, self.P2.expected_wins = 0, 0 #self.expected_wins = {"P1": 0, "P2": 0} self.P1.expected_wins_check_done, self.P2.expected_wins_check_done = 0, 0 #self.expected_wins_check_done = {"P1": 0, "P2": 0} self.round_this_stage = 1 self.done = False self.round_done = False self.stage_done = False return self.wait_for_next_stage_start() def wait_for_next_stage_start(self): if self.debug: print(">Debug: Waiting for next stage to start...\n") data = self.emu.step([]) while data["fighting"] == 1: data = self.emu.step([]) while data["fighting"] == 0: data = self.emu.step([]) def next_round(self): self.round_this_stage += 1 self.reset_this_round = True if self.round_this_stage > 4 else False if self.reset_this_round and self.debug: print(">Debug: Game has reached round 5 in a stage, will reset at end of this round!") self.done = False self.round_done = False self.P1.expected_health, self.P2.expected_health = 0, 0 #self.expected_health = {"P1": 0, "P2": 0} return self.wait_for_next_round_start() def wait_for_next_round_start(self): if self.debug: print(">Debug: Waiting for next round to start...\n") data = self.emu.step([]) self.time_remaining = (int(data["time_remaining_tens_digit"]) * 10) + int(data["time_remaining_ones_digit"]) while int(data["healthP1"]) != 166 and int(data["healthP2"]) != 166 and self.time_remaining != 99: data = self.emu.step([]) self.time_remaining = (int(data["time_remaining_tens_digit"]) * 10) + int(data["time_remaining_ones_digit"]) def new_game_after_loss(self): if self.debug: print(">Debug: Waiting for game over screens...\n") self.run_steps(wait_for_game_over_screens(self.frame_ratio)) self.new_game() def new_game_after_completion(self): if self.debug: print(">Debug: Waiting for game completed screens...\n") self.run_steps(wait_for_game_completed_screens(self.frame_ratio)) self.new_game() # Steps the emulator along by the requested amount of frames required for the agent to provide actions # For single-player def step(self, move_action, attack_action): assert (self.env_player is 'P1' or self.env_player is 'P2') if self.started: if not self.round_done and not self.stage_done and not self.game_over and not self.game_completed: actions = [] if self.env_player is 'P1': actions += p1_index_to_move_action(move_action) actions += p1_index_to_attack_action(attack_action) elif self.env_player is 'P2': actions += p2_index_to_move_action(move_action) actions += p2_index_to_attack_action(attack_action) data = self.gather_frames(actions) data = self.check_done(data) ''' if self.debug: print(">Debug: healthP1:" + str(data["healthP1"]) + " healthP2:" + str(data["healthP2"]) + " \n") print(">Debug: turboP1: " + str(data["turboP1"]) + " turboP2: " + str(data["turboP2"]) + " \n") print(">Debug: time_remaining_tens_digit: " + str( data["time_remaining_tens_digit"]) + " time_remaining_ones_digit: " + str( data["time_remaining_ones_digit"]) + " \n") print(">Debug: winsP1: " + str(data["current_round_winsP1"]) + " winsP2: " + str( data["current_round_winsP2"]) + " \n") print(">Debug: fighting:" + str(data["fighting"]) + " \n") ''' return data["frame"], data[ "rewards"], self.done, self.round_done, self.stage_done, self.game_over, self.game_completed else: raise EnvironmentError("Attempted to step while characters are not fighting") else: raise EnvironmentError("Start must be called before stepping") # Steps the emulator along by the requested amount of frames required for the agent to provide actions # For two-player self-play def vs_step(self, p1_move_action, p1_attack_action, p2_move_action, p2_attack_action): assert (self.env_player is 'Vs') if self.started: if not self.round_done and not self.stage_done and not self.game_over and not self.game_completed: actions = [] actions += p1_index_to_move_action(p1_move_action) actions += p1_index_to_attack_action(p1_attack_action) actions += p2_index_to_move_action(p2_move_action) actions += p2_index_to_attack_action(p2_attack_action) data = self.gather_frames(actions) data = self.check_done(data) ''' if self.debug: print(">Debug: healthP1:" + str(data["healthP1"]) + " healthP2:" + str(data["healthP2"]) + " \n") print(">Debug: turboP1: " + str(data["turboP1"]) + " turboP2: " + str(data["turboP2"]) + " \n") print(">Debug: time_remaining_tens_digit: " + str( data["time_remaining_tens_digit"]) + " time_remaining_ones_digit: " + str( data["time_remaining_ones_digit"]) + " \n") print(">Debug: winsP1: " + str(data["current_round_winsP1"]) + " winsP2: " + str( data["current_round_winsP2"]) + " \n") print(">Debug: fighting:" + str(data["fighting"]) + " \n") ''' return data["frame"], data[ "rewards"], self.done, self.round_done, self.stage_done, self.game_over, self.game_completed else: raise EnvironmentError("Attempted to step while characters are not fighting") else: raise EnvironmentError("Start must be called before stepping") # Collects the specified amount of frames the agent requires before choosing an action def gather_frames(self, actions): data = self.sub_step(actions) frames = [data["frame"]] for i in range(self.frames_per_step - 1): data = add_rewards(data, self.sub_step(actions)) frames.append(data["frame"]) data["frame"] = frames[0] if self.frames_per_step == 1 else frames return data # Steps the emulator along by one time step and feeds in any actions that require pressing # Takes the data returned from the step and updates book keeping variables while returning rewards def sub_step(self, actions): data = self.emu.step([action.value for action in actions]) self.memory_values = data p1_diff_reward = (self.P1.expected_health - data["healthP1"]) p2_diff_reward = (self.P2.expected_health - data["healthP2"]) #p1_diff_reward = (self.expected_health["P1"] - data["healthP1"]) #p2_diff_reward = (self.expected_health["P2"] - data["healthP2"]) self.P1.expected_health, self.P2.expected_health = data["healthP1"], data["healthP2"] #self.expected_health = {"P1": data["healthP1"], "P2": data["healthP2"]} if data["current_round_winsP1"] == self.P1.expected_wins + 1: #if data["current_round_winsP1"] == self.expected_wins["P1"] + 1: p1_round_win_reward = 200 p2_round_win_reward = -200 elif data["current_round_winsP2"] == self.P2.expected_wins + 1: #elif data["current_round_winsP2"] == self.expected_wins["P2"] + 1: p2_round_win_reward = 200 p1_round_win_reward = -200 else: p1_round_win_reward = 0 p2_round_win_reward = 0 self.P1.expected_wins, self.P2.expected_wins = data["current_round_winsP1"], data["current_round_winsP2"] #self.expected_wins["P1"] = data["current_round_winsP1"] #self.expected_wins["P2"] = data["current_round_winsP2"] self.time_remaining = (int(data["time_remaining_tens_digit"]) * 10) + int(data["time_remaining_ones_digit"]) if self.time_remaining == self.expected_time_remaining - 1: if data["healthP1"] < data["healthP2"]: p1_time_remaining_reward = -1 p2_time_remaining_reward = 1 elif data["healthP1"] > data["healthP2"]: p1_time_remaining_reward = 1 p2_time_remaining_reward = -1 else: p1_time_remaining_reward = 0 p2_time_remaining_reward = 0 else: p1_time_remaining_reward = 0 p2_time_remaining_reward = 0 self.expected_time_remaining = self.time_remaining # Return the total rewards of each player for this timestep in a rewards dictionary # Rewards range roughly from +360 to -360, thus normalizing by dividing by 360 # Also round rewards to 5 decimal places rewards = { "P1": round(((p2_diff_reward - p1_diff_reward) + p1_time_remaining_reward + p1_round_win_reward) / 360, 5), "P2": round(((p1_diff_reward - p2_diff_reward) + p2_time_remaining_reward + p2_round_win_reward) / 360, 5) } # Update total rewards for this round and this game self.P1.total_rewards_this_round += rewards['P1'] self.P2.total_rewards_this_round += rewards['P2'] self.P1.total_rewards_this_game += rewards['P1'] self.P2.total_rewards_this_game += rewards['P2'] #self.total_rewards_this_round['P1'] += rewards['P1'] #self.total_rewards_this_round['P2'] += rewards['P2'] ''' if self.debug: print(">Debug: Rewards for P1 this timestep: " + str(rewards["P1"]) + "\n") print(">Debug: Rewards for P2 this timestep: " + str(rewards["P2"]) + "\n") ''' data["rewards"] = rewards return data def on_round_done(self): if self.debug: print(">Debug: Round " + str(self.round_this_stage) + " done!") print(">Debug: Total rewards for P1 ths round: " + str(self.P1.total_rewards_this_round)) print(">Debug: Total rewards for P2 ths round: " + str(self.P2.total_rewards_this_round) + ' \n') #print(">Debug: Total rewards for P1 ths round: " + str(self.total_rewards_this_round['P1']) + ' \n') #print(">Debug: Total rewards for P2 ths round: " + str(self.total_rewards_this_round['P2']) + ' \n') # Set that the round is done self.done = True # Increment the episodes played self.total_episodes_played += 1 # Reset the round's rewards self.P1.total_rewards_this_round, self.P2.total_rewards_this_round = 0, 0 #self.total_rewards_this_round = {"P1": 0, "P2": 0} # Keep up with game reset if it has reached round 5 in a stage if self.reset_this_round: if self.debug: print(">Debug: Game is being reset as it has reached round 5 in a stage! \n") self.reset_this_round = False self.run_steps(wait_for_game_reset(self.frame_ratio)) self.new_game() def on_single_player_stage_win(self): # Log milestone if a new highest stage has been reached if self.stage > self.highest_stage: self.log_stage_milestone() self.highest_stage = self.stage # Check if it reached the final stage of a path if self.path is 'Novice' and self.stage == 8 or \ self.path is 'Warrior' and self.stage == 9 or \ self.path is 'Master' and self.stage == 10 or \ self.path is 'MasterII' and self.stage == 11: # Set the game completed flag to true self.game_completed = True self.finished_single_player = True # Log a game completion milestone self.log_milestone() if self.debug: print(">Debug: Game completed on " + str(self.path) + " path and on " + str(self.difficulties[self.difficulty]) + " difficulty!") if self.env_player is 'P1': print(">Debug: Total rewards for " + self.env_player + " this game: " + str(self.P1.total_rewards_this_game) + "\n") else: print(">Debug: Total rewards for " + self.env_player + " this game: " + str(self.P2.total_rewards_this_game) + "\n") # If the game mode is set to self-play, set the env_player to Vs to start a Vs game next if self.self_play: self.env_player = 'Vs' # If the game has been completed, play on a harder path if there is a harder path if self.path is not 'MasterII': path_index = self.paths.index(self.path) path_index += 1 self.path = self.paths[path_index] # Else, if the Very Hard difficulty hasn't been reached, # increase the difficulty and reset the path back to 'Novice' elif self.difficulty < 4: self.difficulty += 1 self.path = 'Novice' # Else advance to the next stage else: self.stage_done = True self.stage += 1 if self.debug: print(">Debug: Stage won. Advancing to stage " + str(self.stage) + ". \n") def on_single_player_stage_loss(self): self.game_over = True self.finished_single_player = True if self.debug: if self.env_player is 'P1': print(">Debug: Total rewards for " + self.env_player + " this game: " + str(self.P1.total_rewards_this_game)) else: print(">Debug: Total rewards for " + self.env_player + " this game: " + str(self.P2.total_rewards_this_game)) print(">Debug: Stage lost. Quiting game. \n") # If the game mode is set to self-play, set the env_player to Vs to start a Vs game next if self.self_play: self.env_player = 'Vs' def on_single_player_round_win(self): self.round_done = True if self.debug: print(">Debug: Round won. Advancing to next round. \n") def on_single_player_round_loss(self): self.round_done = True if self.debug: print(">Debug: Round lost. Advancing to next round. \n") def vs_continue(self, last_game_winner): if self.debug: print(">Debug: Advancing to next Vs game. \n") if last_game_winner is 'P1': self.run_steps(p2_vs_continue(self.frame_ratio)) elif last_game_winner is 'P2': self.run_steps(p1_vs_continue(self.frame_ratio)) else: raise EnvironmentError("Attempted to continue to new Vs game without a valid last game's winner") p1_character_select_steps, p1_character_index = p1_select_character( self.frame_ratio, self.p1_character_selection) self.P1.set_character(p1_character_index) #self.p1_character_name = self.umk3_character_list[self.p1_character] p2_character_select_steps, p2_character_index = p2_select_character( self.frame_ratio, self.p2_character_selection) self.P2.set_character(p2_character_index) #self.p2_character_name = self.umk3_character_list[self.p2_character] self.run_steps(p1_character_select_steps) self.run_steps(p2_character_select_steps) self.wait_for_fight_start() self.P1.expected_health, self.P2.expected_health = 0, 0 self.P1.expected_wins, self.P2.expected_wins = 0, 0 self.P1.expected_wins_check_done, self.P2.expected_wins_check_done = 0, 0 #self.expected_health = {"P1": 0, "P2": 0} #self.expected_wins = {"P1": 0, "P2": 0} #self.expected_wins_check_done = {"P1": 0, "P2": 0} self.done = False self.game_over = False self.started = True self.reset_this_round = False self.round_this_stage = 1 self.expected_time_remaining = 0 self.time_remaining = 0 self.P1.total_rewards_this_round, self.P2.total_rewards_this_round = 0, 0 self.P1.total_rewards_this_game, self.P2.total_rewards_this_game = 0, 0 #self.total_rewards_this_round = {"P1": 0, "P2": 0} #self.total_rewards_this_game = 0 self.stage = 0 def single_player_after_vs(self): if self.debug: print(">Debug: Waiting for game over screen...\n") self.run_steps(vs_wait_for_game_over_screens(self.frame_ratio)) self.new_game() def vs_after_single_player(self): self.current_vs_rounds_before_single_player = self.vs_rounds_before_single_player self.new_game_after_loss() def on_vs_stage_done(self, last_game_winner): # Set the game over flag to True self.game_over = True # Deduct 1 from the number of Vs games before benchmarking in single player self.current_vs_rounds_before_single_player -= 1 # Set the last Vs stage's winner self.last_vs_game_winner = last_game_winner # Output total rewards for this stage if self.debug: print(">Debug: Total rewards for P1 this game: " + str(self.P1.total_rewards_this_game)) print(">Debug: Total rewards for P2 this game: " + str(self.P2.total_rewards_this_game)) # If more Vs games remain to be played, continue with Vs game if self.current_vs_rounds_before_single_player > 0: if self.debug: print(">Debug: Vs stage done. " + str(self.current_vs_rounds_before_single_player) + " Vs stage(s) remaining. \n") # Else, continue to single player benchmarking with the winner of the last Vs game else: self.env_player = last_game_winner if self.debug: print(">Debug: Vs stages done. Continuing to story with last Vs stage's winner: " + self.env_player) def on_vs_round_done(self): self.round_done = True if self.debug: print(">Debug: Vs round done. Advancing to next round. \n") # Checks whether the round or game has finished def check_done(self, data): # Get the time currently remaining from the two memory locations holding each of the two digits (from 99 to 00) self.time_remaining = int(data["time_remaining_tens_digit"]) * 10 + int(data["time_remaining_ones_digit"]) # If a round has ended if data["current_round_winsP1"] == self.P1.expected_wins_check_done + 1 \ or data["current_round_winsP2"] == self.P2.expected_wins_check_done + 1 \ or (data["healthP1"] == 0 and data["healthP2"] == 0) \ or (data["healthP1"] == data["healthP2"] and self.time_remaining == 0): # (data["healthP1"] == 0 and data["healthP2"] == 0) or self.time_remaining == 0 # self.time_remaining == 0 check may trigger a bug self.on_round_done() if self.done: # If the round wins of P1 have incremented, P1 has won a round! if data["current_round_winsP1"] == self.P1.expected_wins_check_done + 1: if self.debug: print(">Debug: Game End Condition Met -> P1 wins have incremented in game's memory.") print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str( data["healthP2"]) + " Current Time:" + str(self.time_remaining)) self.P1.expected_wins_check_done = data["current_round_winsP1"] # If the agent is playing as player 1 if self.env_player is 'P1': # If it has reached 2 round wins, P1 (agent) has won the stage if data["current_round_winsP1"] == 2: self.on_single_player_stage_win() # If P1 (agent) hasn't reached the round win limit, proceed to the next round else: self.on_single_player_round_win() # Else if the agent is playing as player 2 elif self.env_player is 'P2': # If it has reached 2 round wins, P1 (agent) has won the stage if data["current_round_winsP1"] == 2: self.on_single_player_stage_loss() # If P1 (agent) hasn't reached the round win limit, proceed to the next round else: self.on_single_player_round_loss() # Else if this Vs self-play else: # If it has reached 2 round wins, P1 has won the Vs stage if data["current_round_winsP1"] == 2: self.on_vs_stage_done('P1') # If P1 hasn't reached the round win limit, proceed to the next round else: self.on_vs_round_done() # If the round wins of P2 have incremented, P2 has won a round! elif data["current_round_winsP2"] == self.P2.expected_wins_check_done + 1: if self.debug: print(">Debug: Game End Condition Met -> P2 wins have incremented in game's memory.") print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str( data["healthP2"]) + " Current Time:" + str(self.time_remaining)) self.P2.expected_wins_check_done = data["current_round_winsP2"] # If the agent is playing as player 2 if self.env_player is 'P2': # If it has reached 2 round wins, P2 (agent) has won the stage if data["current_round_winsP2"] == 2: self.on_single_player_stage_win() # If P2 (agent) hasn't reached the round win limit, proceed to the next round else: self.on_single_player_round_win() # Else if the agent is playing as player 1 elif self.env_player is 'P1': # If agent is P1 and P2 (CPU) wins 2 rounds, the game is over for the agent if data["current_round_winsP2"] == 2: self.on_single_player_stage_loss() # If P2 (CPU) hasn't reached the round win limit, proceed to the next round else: self.on_single_player_round_loss() # Else if this Vs self-play else: # If it has reached 2 round wins, P2 has won the Vs stage if data["current_round_winsP2"] == 2: self.on_vs_stage_done('P2') # If P2 hasn't reached the round win limit, proceed to the next round else: self.on_vs_round_done() # If no round wins have incremented but both players' health has reached 0 # or the time has reached 00 and both players have equal health, it's a draw! elif data["healthP1"] == 0 and data["healthP2"] == 0: if self.debug: print(">Debug: Game End Condition Met -> Both P1 and P2 health values have reached 0 (Draw).") print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str( data["healthP2"]) + " Current Time:" + str(self.time_remaining)) print(">Debug: Draw! Advancing to next round. \n") self.round_done = True elif data["healthP1"] == data["healthP2"] and self.time_remaining == 0: if self.debug: print( ">Debug: Game End Condition Met -> Time has ran out with P1 and P2 health values equal (Draw).") print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str( data["healthP2"]) + " Current Time:" + str(self.time_remaining)) print(">Debug: Draw! Advancing to next round. \n") self.round_done = True else: raise EnvironmentError('Done flag enabled but game done condition not detected in check_done') return data def reset(self): if self.game_over: if not self.self_play: self.new_game_after_loss() else: if self.env_player is 'Vs' and not self.finished_single_player: self.vs_continue(self.last_vs_game_winner) elif self.env_player is 'Vs' and self.finished_single_player: self.vs_after_single_player() else: self.single_player_after_vs() elif self.game_completed: self.new_game_after_completion() elif self.stage_done: self.next_stage() elif self.round_done: self.next_round() else: raise EnvironmentError("Reset called while gameplay still running") def log_stage_milestone(self): assert(self.env_player is 'P1' or self.env_player is 'P2') # Create milestones folder if it does not exist if not os.path.exists('milestones'): os.mkdir('milestones') # Create a text file called the [name of the environment] + "_milestones" if it does not exist # Open the text file with write permission if not self.logged_first_milestone: # If this is a new training, create or overwrite existing milestones file f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'w+') else: # If this is a continuation from a previous session and a milestone file exists, # append to the milestones file f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'a+') # Create a newline f.write('\n') # Write the milestone f.write(self.env_id + " has managed to defeat stage " + str(self.stage) + " after " + str(self.total_episodes_played) + " total episodes playing as " + self.env_player + " with " + self.P1.character_name if self.env_player is 'P1' else self.P2.character_name + ".\r\n") # Close the milestones file f.close() def log_milestone(self): assert(self.env_player is 'P1' or self.env_player is 'P2') # Create milestones folder if it does not exist if not os.path.exists('milestones'): os.mkdir('milestones') # Create a text file called the [name of the environment] + "_milestones" if it does not exist # Open the text file with write permission if not self.logged_first_milestone: # If this is a new training, create or overwrite existing milestones file f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'w+') else: # If this is a continuation from a previous session and a milestone file exists, # append to the milestones file f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'a+') # Create a newline f.write('\n') # Write the milestone f.write(self.env_id + " has managed to complete the " + str(self.path) + " path on the " + str(self.difficulties[self.difficulty]) + " difficulty after " + str(self.total_episodes_played) + " total episodes as " + self.env_player + " with " + self.P1.character_name if self.env_player is 'P1' else self.P2.character_name + ".\r\n") # Close the milestones file f.close() # Safely closes emulator def close(self): self.emu.close()