Python Emulator.step Examples

Programming Language: Python

Namespace/Package Name: MAMEToolkit.emulator.Emulator

Class/Type: Emulator

Method/Function: step

Examples at hotexamples.com: 5

Python Emulator.step - 5 examples found. These are the top rated real world Python examples of MAMEToolkit.emulator.Emulator.Emulator.step extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Emulator(6)

close(6)

step(5)

Frequently Used Methods

Emulator (6)

close (6)

step (5)

Example #1

Show file

File: EmulatorTest.py Project: zennsocial/MAMEToolkit

def run_emulator(env_id, game_id, roms_path, memory_addresses, output_queue):
    emulator = None
    try:
        emulator = Emulator(env_id, roms_path, game_id, memory_addresses)
        output_queue.put(emulator.step([]))
    finally:
        emulator.close()

Example #2

Show file

File: EmulatorTest.py Project: zennsocial/MAMEToolkit

 def test_step(self):
     memory_addresses = {"test": Address("02000008", "u8")}
     game_id = "sfiii3n"
     emulator = None
     try:
         emulator = Emulator("testEnv1", game_id, memory_addresses)
         data = emulator.step([])
         assert_that(data["frame"].shape, equal_to((224, 384, 3)))
         assert_that(data["test"], equal_to(0))
     finally:
         emulator.close()

Example #3

Show file

File: EmulatorTest.py Project: zorrock/MAMEToolkit

 def test_step(self):
     memory_addresses = {"test": Address("02000008", "u8")}
     game_id = "sfiii3n"
     roms_path = "/home/michael/dev/MAMEToolkit/MAMEToolkit/emulator/mame/roms"
     emulator = None
     try:
         emulator = Emulator("testEnv1", roms_path, game_id, memory_addresses)
         data = emulator.step([])
         assert_that(data["frame"].shape, equal_to((224, 384, 3)))
         assert_that(data["test"], equal_to(0))
     finally:
         emulator.close()

Example #4

Show file

class Environment(object):

    # env_id - the unique identifier of the emulator environment, used to create fifo pipes
    # difficulty - the difficult to be used in story mode gameplay
    # frame_ratio, frames_per_step - see Emulator class
    # render, throttle, debug - see Console class
    def __init__(self,
                 env_id,
                 roms_path,
                 difficulty=3,
                 frame_ratio=3,
                 frames_per_step=3,
                 render=True,
                 throttle=False,
                 debug=False):
        self.difficulty = difficulty
        self.frame_ratio = frame_ratio
        self.frames_per_step = frames_per_step
        self.throttle = throttle
        self.emu = Emulator(env_id,
                            roms_path,
                            "sfiii3n",
                            setup_memory_addresses(),
                            frame_ratio=frame_ratio,
                            render=render,
                            throttle=throttle,
                            debug=debug)
        self.started = False
        self.expected_health = {"P1": 0, "P2": 0}
        self.expected_wins = {"P1": 0, "P2": 0}
        self.round_done = False
        self.stage_done = False
        self.game_done = False
        self.stage = 1

    # Runs a set of action steps over a series of time steps
    # Used for transitioning the emulator through non-learnable gameplay, aka. title screens, character selects
    def run_steps(self, steps):
        for step in steps:
            for i in range(step["wait"]):
                self.emu.step([])
            self.emu.step([action.value for action in step["actions"]])

    # Must be called first after creating this class
    # Sends actions to the game until the learnable gameplay starts
    # Returns the first few frames of gameplay
    def start(self):
        if self.throttle:
            for i in range(int(250 / self.frame_ratio)):
                self.emu.step([])
        self.run_steps(set_difficulty(self.frame_ratio, self.difficulty))
        self.run_steps(start_game(self.frame_ratio))
        frames = self.wait_for_fight_start()
        self.started = True
        return frames

    # Observes the game and waits for the fight to start
    def wait_for_fight_start(self):
        data = self.emu.step([])
        while data["fighting"] == 0:
            data = self.emu.step([])
        self.expected_health = {"P1": data["healthP1"], "P2": data["healthP2"]}
        data = self.gather_frames([])
        return data["frame"]

    def reset(self):
        if self.game_done:
            return self.new_game()
        elif self.stage_done:
            return self.next_stage()
        elif self.round_done:
            return self.next_round()
        else:
            raise EnvironmentError("Reset called while gameplay still running")

    # To be called when a round finishes
    # Performs the necessary steps to take the agent to the next round of gameplay
    def next_round(self):
        self.round_done = False
        self.expected_health = {"P1": 0, "P2": 0}
        return self.wait_for_fight_start()

    # To be called when a game finishes
    # Performs the necessary steps to take the agent(s) to the next game and resets the necessary book keeping variables
    def next_stage(self):
        self.wait_for_continue()
        self.run_steps(next_stage(self.frame_ratio))
        self.expected_health = {"P1": 0, "P2": 0}
        self.expected_wins = {"P1": 0, "P2": 0}
        self.round_done = False
        self.stage_done = False
        return self.wait_for_fight_start()

    def new_game(self):
        self.wait_for_continue()
        self.run_steps(new_game(self.frame_ratio))
        self.expected_health = {"P1": 0, "P2": 0}
        self.expected_wins = {"P1": 0, "P2": 0}
        self.round_done = False
        self.stage_done = False
        self.game_done = False
        self.stage = 1
        return self.wait_for_fight_start()

    # Steps the emulator along until the screen goes black at the very end of a game
    def wait_for_continue(self):
        data = self.emu.step([])
        if self.frames_per_step == 1:
            while data["frame"].sum() != 0:
                data = self.emu.step([])
        else:
            while data["frame"][0].sum() != 0:
                data = self.emu.step([])

    # Steps the emulator along until the round is definitely over
    def run_till_victor(self, data):
        while self.expected_wins["P1"] == data[
                "winsP1"] and self.expected_wins["P2"] == data["winsP2"]:
            data = add_rewards(data, self.sub_step([]))
        self.expected_wins = {"P1": data["winsP1"], "P2": data["winsP2"]}
        return data

    # Checks whether the round or game has finished
    def check_done(self, data):
        if data["fighting"] == 0:
            data = self.run_till_victor(data)
            self.round_done = True
            if data["winsP1"] == 2:
                self.stage_done = True
                self.stage += 1
            if data["winsP2"] == 2:
                self.game_done = True
        return data

    # Collects the specified amount of frames the agent requires before choosing an action
    def gather_frames(self, actions):
        data = self.sub_step(actions)
        frames = [data["frame"]]
        for i in range(self.frames_per_step - 1):
            data = add_rewards(data, self.sub_step(actions))
            frames.append(data["frame"])
        data["frame"] = frames[0] if self.frames_per_step == 1 else frames
        return data

    # Steps the emulator along by one time step and feeds in any actions that require pressing
    # Takes the data returned from the step and updates book keeping variables
    def sub_step(self, actions):
        data = self.emu.step([action.value for action in actions])

        p1_diff = (self.expected_health["P1"] - data["healthP1"])
        p2_diff = (self.expected_health["P2"] - data["healthP2"])
        self.expected_health = {"P1": data["healthP1"], "P2": data["healthP2"]}

        rewards = {"P1": (p2_diff - p1_diff), "P2": (p1_diff - p2_diff)}

        data["rewards"] = rewards
        return data

    # Steps the emulator along by the requested amount of frames required for the agent to provide actions
    def step(self, move_action, attack_action):
        if self.started:
            if not self.round_done and not self.stage_done and not self.game_done:
                actions = []
                if random.random() > 0.3:
                    actions += index_to_move_action(move_action)
                    actions += index_to_attack_action(attack_action)
                else:
                    actions += index_to_super_action(
                        (move_action + attack_action) % 6)
                data = self.gather_frames(actions)
                data = self.check_done(data)
                return data["frame"], data[
                    "rewards"], self.round_done, self.stage_done, self.game_done
            else:
                raise EnvironmentError(
                    "Attempted to step while characters are not fighting")
        else:
            raise EnvironmentError("Start must be called before stepping")

    # Safely closes emulator
    def close(self):
        self.emu.close()

Example #5

Show file

class Environment(object):

    def __init__(self, env_id, roms_path, self_play=True, vs_rounds_before_single_player=2, player='P1', character = 19, 
                 frame_ratio=2, frames_per_step=1, render=True, throttle=False, debug=True, new_training=True):

        self.p1_character_selection = character  # 'Scorpion' (default) is 6 #19 and over for random character
        self.p2_character_selection = character

        # This will need Xvfb X11 virtual server installed on GNU/Linux system
        if not render:
            os.system("Xvfb :1 -screen 0 800x600x16 +extension RANDR &")
            os.environ["DISPLAY"] = ":1"

        self.env_id = env_id
        self.self_play = self_play
        self.frame_ratio = frame_ratio
        self.frames_per_step = frames_per_step
        self.throttle = throttle
        self.debug = debug
        self.emu = Emulator(env_id, roms_path, "umk3", setup_memory_addresses(), frame_ratio=frame_ratio, render=render,
                            throttle=throttle, debug=debug)
        self.started = False

        self.expected_time_remaining = 0
        self.time_remaining = 0
        self.done = False
        self.round_done = False
        self.stage_done = False
        self.game_over = False
        self.game_completed = False
        self.finished_single_player = False
        self.num_total_characters = umk3_num_total_characters

        # Create two Player objects with useful values for each player (a player can be an agent or the game's AI)
        self.P1 = Player('P1', self.p1_character_selection, self.num_total_characters)
        self.P2 = Player('P2', self.p2_character_selection, self.num_total_characters)
        self.env_player = player  # If environment's agent controls 'P1', 'P2' or both in 2 Player self-play 'Vs'

        # Player is initialized to 'Vs' if self_play is True
        if self.self_play is True:
            self.env_player = 'Vs'

        # Number of self-play Vs rounds to be played before the agent plays a single player benchmark in story mode
        self.vs_rounds_before_single_player = vs_rounds_before_single_player
        self.current_vs_rounds_before_single_player = vs_rounds_before_single_player

        # Stage is counting only if playing in single-player mode
        # If 'Vs' mode, Stage remains at 0
        self.stage = 0 if player is 'Vs' else 1

        self.last_vs_game_winner = ''
        self.highest_stage = 0
        self.path = 'Novice'
        self.difficulty = 0
        self.expected_difficulty = 0  # Assumes UMK3 starts with Very Easy pre-selected

        self.paths = ['Novice', 'Warrior', 'Master', 'MasterII']
        self.difficulties = ['Very Easy', 'Easy', 'Medium', 'Hard', 'Very Hard']

        # If the game somehow reaches round 5 in a stage (3 draws), it resets after round 5 ends
        self.round_this_stage = 1
        self.reset_this_round = False

        self.total_episodes_played = 0
        self.memory_values = []

        # Track if any milsetones have been logged to decide if to create a new file or append
        self.new_training = new_training

        if not self.new_training and os.path.exists(os.path.join('milestones', self.env_id + '_milestones.txt')):
            self.logged_first_milestone = True
        else:
            self.logged_first_milestone = False

    # Runs a set of action steps over a series of time steps
    # Used for transitioning the emulator through non-learnable gameplay, aka. title screens, character selects
    def run_steps(self, steps):
        for step in steps:
            for i in range(step["wait"]):
                self.emu.step([])
            self.emu.step([action.value for action in step["actions"]])

    def new_game(self):

        assert (self.env_player is 'P1' or self.env_player is 'P2' or self.env_player is 'Vs')

        self.expected_difficulty, difficulty_steps = set_difficulty(self.frame_ratio, self.difficulty,
                                                                    self.expected_difficulty)
        self.run_steps(difficulty_steps)

        # If player is P1, execute steps to start story-mode with Player 1
        if self.env_player is 'P1':
            # Select single-player character, only if environment is not in self-play mode
            # If environment is in self-play, character is pre-selected as the one chosen by the player
            # who won the last Vs game
            if not self.self_play:
                self.run_steps(p1_start_game(self.frame_ratio))
                p1_character_select_steps, p1_character_index = \
                    p1_select_character(self.frame_ratio, self.p1_character_selection)
                self.P1.set_character(p1_character_index)
                if self.debug:
                    print(">Debug: Starting a new single-player game... Difficulty: " +
                          self.difficulties[self.difficulty] + ", Path: " + self.path +
                          ", Player: " + self.env_player + ", Character: " + self.P1.character_name + " \n")
                self.run_steps(p1_character_select_steps)
            else:
                if self.debug:
                    print(">Debug: Continuing to single-player game from Vs... Difficulty: " +
                          self.difficulties[self.difficulty] + ", Path: " + self.path + ", Player: " +
                          self.env_player + ", Character: " + self.P1.character_name + " \n")
            # Select story mode path
            self.run_steps(p1_select_path(self.frame_ratio, self.path))
        # If player is P2, execute steps to start story-mode with Player 2
        elif self.env_player is 'P2':
            # Select single-player character, only if environment is not in self-play mode
            # If environment is in self-play, character is pre-selected as the one chosen by the player
            # who won the last Vs game
            if not self.self_play:
                self.run_steps(p2_start_game(self.frame_ratio))
                p2_character_select_steps, p2_character_index = \
                    p2_select_character(self.frame_ratio, self.p2_character_selection)
                self.P2.set_character(p2_character_index)
                if self.debug:
                    print(">Debug: Starting a new single-player game... Difficulty: " +
                          self.difficulties[self.difficulty] + ", Path: " + self.path +
                          ", Player: " + self.env_player + ", Character: " + self.P2.character_name + " \n")
                self.run_steps(p2_character_select_steps)
            else:
                if self.debug:
                    print(">Debug: Continuing to single-player game from Vs... Difficulty: " +
                          self.difficulties[self.difficulty] + ", Path: " + self.path +
                          ", Player: " + self.env_player + ", Character: " + self.P2.character_name + " \n")
            # Select story mode path
            self.run_steps(p2_select_path(self.frame_ratio, self.path))
        # If player is set to Vs (in Self-Play mode), execute steps to start 2-Player Vs mode
        else:
            self.run_steps(p1_and_p2_vs_start_game(self.frame_ratio))
            p1_character_select_steps, p1_character_index = p1_select_character(
                self.frame_ratio, self.p1_character_selection)
            self.P1.set_character(p1_character_index)
            p2_character_select_steps, p2_character_index = p2_select_character(
                self.frame_ratio, self.p2_character_selection)
            self.P2.set_character(p2_character_index)
            if self.debug:
                print(">Debug: Starting a new Vs game... Mode: " + self.env_player + ", P1 Character: " +
                      self.P1.character_name + ", P2 Character: " + self.P2.character_name + " \n")
            self.run_steps(p1_character_select_steps)
            self.run_steps(p2_character_select_steps)

        self.wait_for_fight_start()

        self.P1.expected_health, self.P2.expected_health = 0, 0
        #self.expected_health = {"P1": 0, "P2": 0}
        self.P1.expected_wins, self.P2.expected_wins = 0, 0
        #self.expected_wins = {"P1": 0, "P2": 0}
        self.P1.expected_wins_check_done, self.P2.expected_wins_check_done = 0, 0
        #self.expected_wins_check_done = {"P1": 0, "P2": 0}
        self.done = False
        self.round_done = False
        self.stage_done = False
        self.game_over = False
        self.game_completed = False
        self.started = True
        self.expected_time_remaining = 0
        self.time_remaining = 0
        self.round_this_stage = 1
        self.P1.total_rewards_this_round, self.P2.total_rewards_this_round = 0, 0
        #self.total_rewards_this_round = {"P1": 0, "P2": 0}
        self.P1.total_rewards_this_game, self.P2.total_rewards_this_game = 0, 0
        #self.total_rewards_this_game = 0
        self.finished_single_player = False
        self.reset_this_round = False
        # Stage is counting only if playing in single-player mode
        # If 'Vs' mode, Stage remains at 0
        self.stage = 0 if self.env_player is 'Vs' else 1

    # Must be called first after creating this class
    # Sends actions to the game until the learnable gameplay starts
    def start(self):
        if self.throttle:
            for i in range(int(250 / self.frame_ratio)):
                self.emu.step([])
        self.new_game()

    def wait_for_fight_start(self):
        data = self.emu.step([])
        while data["fighting"] == 0:
            data = self.emu.step([])

    def next_stage(self):
        self.P1.expected_health, self.P2.expected_health = 0, 0
        #self.expected_health = {"P1": 0, "P2": 0}
        self.P1.expected_wins, self.P2.expected_wins = 0, 0
        #self.expected_wins = {"P1": 0, "P2": 0}
        self.P1.expected_wins_check_done, self.P2.expected_wins_check_done = 0, 0
        #self.expected_wins_check_done = {"P1": 0, "P2": 0}
        self.round_this_stage = 1
        self.done = False
        self.round_done = False
        self.stage_done = False
        return self.wait_for_next_stage_start()

    def wait_for_next_stage_start(self):
        if self.debug:
            print(">Debug: Waiting for next stage to start...\n")
        data = self.emu.step([])
        while data["fighting"] == 1:
            data = self.emu.step([])
        while data["fighting"] == 0:
            data = self.emu.step([])

    def next_round(self):
        self.round_this_stage += 1
        self.reset_this_round = True if self.round_this_stage > 4 else False
        if self.reset_this_round and self.debug:
            print(">Debug: Game has reached round 5 in a stage, will reset at end of this round!")
        self.done = False
        self.round_done = False
        self.P1.expected_health, self.P2.expected_health = 0, 0
        #self.expected_health = {"P1": 0, "P2": 0}

        return self.wait_for_next_round_start()

    def wait_for_next_round_start(self):
        if self.debug:
            print(">Debug: Waiting for next round to start...\n")
        data = self.emu.step([])
        self.time_remaining = (int(data["time_remaining_tens_digit"]) * 10) + int(data["time_remaining_ones_digit"])
        while int(data["healthP1"]) != 166 and int(data["healthP2"]) != 166 and self.time_remaining != 99:
            data = self.emu.step([])
            self.time_remaining = (int(data["time_remaining_tens_digit"]) * 10) + int(data["time_remaining_ones_digit"])

    def new_game_after_loss(self):
        if self.debug:
            print(">Debug: Waiting for game over screens...\n")
        self.run_steps(wait_for_game_over_screens(self.frame_ratio))
        self.new_game()

    def new_game_after_completion(self):
        if self.debug:
            print(">Debug: Waiting for game completed screens...\n")
        self.run_steps(wait_for_game_completed_screens(self.frame_ratio))
        self.new_game()

    # Steps the emulator along by the requested amount of frames required for the agent to provide actions
    # For single-player
    def step(self, move_action, attack_action):
        assert (self.env_player is 'P1' or self.env_player is 'P2')
        if self.started:
            if not self.round_done and not self.stage_done and not self.game_over and not self.game_completed:
                actions = []
                if self.env_player is 'P1':
                    actions += p1_index_to_move_action(move_action)
                    actions += p1_index_to_attack_action(attack_action)
                elif self.env_player is 'P2':
                    actions += p2_index_to_move_action(move_action)
                    actions += p2_index_to_attack_action(attack_action)
                data = self.gather_frames(actions)
                data = self.check_done(data)
                '''
                if self.debug:
                    print(">Debug: healthP1:" + str(data["healthP1"]) + " healthP2:" + str(data["healthP2"]) + " \n")
                    print(">Debug: turboP1: " + str(data["turboP1"]) + " turboP2: " + str(data["turboP2"]) + " \n")
                    print(">Debug: time_remaining_tens_digit: " + str(
                        data["time_remaining_tens_digit"]) + " time_remaining_ones_digit: " + str(
                        data["time_remaining_ones_digit"]) + " \n")
                    print(">Debug: winsP1: " + str(data["current_round_winsP1"]) + " winsP2: " + str(
                        data["current_round_winsP2"]) + " \n")
                    print(">Debug: fighting:" + str(data["fighting"]) + " \n")
                    '''
                return data["frame"], data[
                    "rewards"], self.done, self.round_done, self.stage_done, self.game_over, self.game_completed
            else:
                raise EnvironmentError("Attempted to step while characters are not fighting")
        else:
            raise EnvironmentError("Start must be called before stepping")

    # Steps the emulator along by the requested amount of frames required for the agent to provide actions
    # For two-player self-play
    def vs_step(self, p1_move_action, p1_attack_action, p2_move_action, p2_attack_action):
        assert (self.env_player is 'Vs')
        if self.started:
            if not self.round_done and not self.stage_done and not self.game_over and not self.game_completed:
                actions = []
                actions += p1_index_to_move_action(p1_move_action)
                actions += p1_index_to_attack_action(p1_attack_action)
                actions += p2_index_to_move_action(p2_move_action)
                actions += p2_index_to_attack_action(p2_attack_action)
                data = self.gather_frames(actions)
                data = self.check_done(data)
                '''
                if self.debug:
                    print(">Debug: healthP1:" + str(data["healthP1"]) + " healthP2:" + str(data["healthP2"]) + " \n")
                    print(">Debug: turboP1: " + str(data["turboP1"]) + " turboP2: " + str(data["turboP2"]) + " \n")
                    print(">Debug: time_remaining_tens_digit: " + str(
                        data["time_remaining_tens_digit"]) + " time_remaining_ones_digit: " + str(
                        data["time_remaining_ones_digit"]) + " \n")
                    print(">Debug: winsP1: " + str(data["current_round_winsP1"]) + " winsP2: " + str(
                        data["current_round_winsP2"]) + " \n")
                    print(">Debug: fighting:" + str(data["fighting"]) + " \n")
                '''
                return data["frame"], data[
                    "rewards"], self.done, self.round_done, self.stage_done, self.game_over, self.game_completed
            else:
                raise EnvironmentError("Attempted to step while characters are not fighting")
        else:
            raise EnvironmentError("Start must be called before stepping")

    # Collects the specified amount of frames the agent requires before choosing an action
    def gather_frames(self, actions):
        data = self.sub_step(actions)
        frames = [data["frame"]]
        for i in range(self.frames_per_step - 1):
            data = add_rewards(data, self.sub_step(actions))
            frames.append(data["frame"])
        data["frame"] = frames[0] if self.frames_per_step == 1 else frames
        return data

    # Steps the emulator along by one time step and feeds in any actions that require pressing
    # Takes the data returned from the step and updates book keeping variables while returning rewards
    def sub_step(self, actions):
        data = self.emu.step([action.value for action in actions])
        self.memory_values = data

        p1_diff_reward = (self.P1.expected_health - data["healthP1"])
        p2_diff_reward = (self.P2.expected_health - data["healthP2"])
        #p1_diff_reward = (self.expected_health["P1"] - data["healthP1"])
        #p2_diff_reward = (self.expected_health["P2"] - data["healthP2"])

        self.P1.expected_health, self.P2.expected_health = data["healthP1"], data["healthP2"]
        #self.expected_health = {"P1": data["healthP1"], "P2": data["healthP2"]}

        if data["current_round_winsP1"] == self.P1.expected_wins + 1:
        #if data["current_round_winsP1"] == self.expected_wins["P1"] + 1:
            p1_round_win_reward = 200
            p2_round_win_reward = -200
        elif data["current_round_winsP2"] == self.P2.expected_wins + 1:
        #elif data["current_round_winsP2"] == self.expected_wins["P2"] + 1:
            p2_round_win_reward = 200
            p1_round_win_reward = -200
        else:
            p1_round_win_reward = 0
            p2_round_win_reward = 0

        self.P1.expected_wins, self.P2.expected_wins = data["current_round_winsP1"], data["current_round_winsP2"]
        #self.expected_wins["P1"] = data["current_round_winsP1"]
        #self.expected_wins["P2"] = data["current_round_winsP2"]

        self.time_remaining = (int(data["time_remaining_tens_digit"]) * 10) + int(data["time_remaining_ones_digit"])

        if self.time_remaining == self.expected_time_remaining - 1:

            if data["healthP1"] < data["healthP2"]:
                p1_time_remaining_reward = -1
                p2_time_remaining_reward = 1

            elif data["healthP1"] > data["healthP2"]:
                p1_time_remaining_reward = 1
                p2_time_remaining_reward = -1
            else:
                p1_time_remaining_reward = 0
                p2_time_remaining_reward = 0
        else:
            p1_time_remaining_reward = 0
            p2_time_remaining_reward = 0

        self.expected_time_remaining = self.time_remaining

        # Return the total rewards of each player for this timestep in a rewards dictionary
        # Rewards range roughly from +360 to -360, thus normalizing by dividing by 360
        # Also round rewards to 5 decimal places
        rewards = {
            "P1": round(((p2_diff_reward - p1_diff_reward) + p1_time_remaining_reward + p1_round_win_reward) / 360, 5),
            "P2": round(((p1_diff_reward - p2_diff_reward) + p2_time_remaining_reward + p2_round_win_reward) / 360, 5)
        }

        # Update total rewards for this round and this game
        self.P1.total_rewards_this_round += rewards['P1']
        self.P2.total_rewards_this_round += rewards['P2']
        self.P1.total_rewards_this_game += rewards['P1']
        self.P2.total_rewards_this_game += rewards['P2']
        #self.total_rewards_this_round['P1'] += rewards['P1']
        #self.total_rewards_this_round['P2'] += rewards['P2']
        '''
        if self.debug:
            print(">Debug: Rewards for P1 this timestep: " + str(rewards["P1"]) + "\n")
            print(">Debug: Rewards for P2 this timestep: " + str(rewards["P2"]) + "\n")
        '''
        data["rewards"] = rewards
        return data

    def on_round_done(self):
        if self.debug:
            print(">Debug: Round " + str(self.round_this_stage) + " done!")
            print(">Debug: Total rewards for P1 ths round: " + str(self.P1.total_rewards_this_round))
            print(">Debug: Total rewards for P2 ths round: " + str(self.P2.total_rewards_this_round) + ' \n')
            #print(">Debug: Total rewards for P1 ths round: " + str(self.total_rewards_this_round['P1']) + ' \n')
            #print(">Debug: Total rewards for P2 ths round: " + str(self.total_rewards_this_round['P2']) + ' \n')
        # Set that the round is done
        self.done = True
        # Increment the episodes played
        self.total_episodes_played += 1
        # Reset the round's rewards
        self.P1.total_rewards_this_round, self.P2.total_rewards_this_round = 0, 0
        #self.total_rewards_this_round = {"P1": 0, "P2": 0}
        # Keep up with game reset if it has reached round 5 in a stage
        if self.reset_this_round:
            if self.debug:
                print(">Debug: Game is being reset as it has reached round 5 in a stage! \n")
            self.reset_this_round = False
            self.run_steps(wait_for_game_reset(self.frame_ratio))
            self.new_game()

    def on_single_player_stage_win(self):
        # Log milestone if a new highest stage has been reached
        if self.stage > self.highest_stage:
            self.log_stage_milestone()
            self.highest_stage = self.stage
        # Check if it reached the final stage of a path
        if self.path is 'Novice' and self.stage == 8 or \
                self.path is 'Warrior' and self.stage == 9 or \
                self.path is 'Master' and self.stage == 10 or \
                self.path is 'MasterII' and self.stage == 11:
            # Set the game completed flag to true
            self.game_completed = True
            self.finished_single_player = True
            # Log a game completion milestone
            self.log_milestone()
            if self.debug:
                print(">Debug: Game completed on  " + str(self.path) + " path and on "
                      + str(self.difficulties[self.difficulty]) + " difficulty!")
                if self.env_player is 'P1':
                    print(">Debug: Total rewards for " + self.env_player + " this game: " +
                          str(self.P1.total_rewards_this_game) + "\n")
                else:
                    print(">Debug: Total rewards for " + self.env_player + " this game: " +
                          str(self.P2.total_rewards_this_game) + "\n")
            # If the game mode is set to self-play, set the env_player to Vs to start a Vs game next
            if self.self_play:
                self.env_player = 'Vs'
            # If the game has been completed, play on a harder path if there is a harder path
            if self.path is not 'MasterII':
                path_index = self.paths.index(self.path)
                path_index += 1
                self.path = self.paths[path_index]
            # Else, if the Very Hard difficulty hasn't been reached,
            # increase the difficulty and reset the path back to 'Novice'
            elif self.difficulty < 4:
                self.difficulty += 1
                self.path = 'Novice'
        # Else advance to the next stage
        else:
            self.stage_done = True
            self.stage += 1
            if self.debug:
                print(">Debug: Stage won. Advancing to stage " + str(self.stage) + ". \n")

    def on_single_player_stage_loss(self):
        self.game_over = True
        self.finished_single_player = True
        if self.debug:
            if self.env_player is 'P1':
                print(">Debug: Total rewards for " + self.env_player + " this game: " +
                      str(self.P1.total_rewards_this_game))
            else:
                print(">Debug: Total rewards for " + self.env_player + " this game: " +
                      str(self.P2.total_rewards_this_game))
            print(">Debug: Stage lost. Quiting game. \n")
        # If the game mode is set to self-play, set the env_player to Vs to start a Vs game next
        if self.self_play:
            self.env_player = 'Vs'

    def on_single_player_round_win(self):
        self.round_done = True
        if self.debug:
            print(">Debug: Round won. Advancing to next round. \n")

    def on_single_player_round_loss(self):
        self.round_done = True
        if self.debug:
            print(">Debug: Round lost. Advancing to next round. \n")

    def vs_continue(self, last_game_winner):
        if self.debug:
            print(">Debug: Advancing to next Vs game. \n")

        if last_game_winner is 'P1':
            self.run_steps(p2_vs_continue(self.frame_ratio))
        elif last_game_winner is 'P2':
            self.run_steps(p1_vs_continue(self.frame_ratio))
        else:
            raise EnvironmentError("Attempted to continue to new Vs game without a valid last game's winner")

        p1_character_select_steps, p1_character_index = p1_select_character(
            self.frame_ratio, self.p1_character_selection)
        self.P1.set_character(p1_character_index)
        #self.p1_character_name = self.umk3_character_list[self.p1_character]
        p2_character_select_steps, p2_character_index = p2_select_character(
            self.frame_ratio, self.p2_character_selection)
        self.P2.set_character(p2_character_index)
        #self.p2_character_name = self.umk3_character_list[self.p2_character]
        self.run_steps(p1_character_select_steps)
        self.run_steps(p2_character_select_steps)

        self.wait_for_fight_start()
        self.P1.expected_health, self.P2.expected_health = 0, 0
        self.P1.expected_wins, self.P2.expected_wins = 0, 0
        self.P1.expected_wins_check_done, self.P2.expected_wins_check_done = 0, 0
        #self.expected_health = {"P1": 0, "P2": 0}
        #self.expected_wins = {"P1": 0, "P2": 0}
        #self.expected_wins_check_done = {"P1": 0, "P2": 0}
        self.done = False
        self.game_over = False
        self.started = True
        self.reset_this_round = False
        self.round_this_stage = 1
        self.expected_time_remaining = 0
        self.time_remaining = 0
        self.P1.total_rewards_this_round, self.P2.total_rewards_this_round = 0, 0
        self.P1.total_rewards_this_game, self.P2.total_rewards_this_game = 0, 0
        #self.total_rewards_this_round = {"P1": 0, "P2": 0}
        #self.total_rewards_this_game = 0
        self.stage = 0

    def single_player_after_vs(self):
        if self.debug:
            print(">Debug: Waiting for game over screen...\n")
        self.run_steps(vs_wait_for_game_over_screens(self.frame_ratio))
        self.new_game()

    def vs_after_single_player(self):
        self.current_vs_rounds_before_single_player = self.vs_rounds_before_single_player
        self.new_game_after_loss()

    def on_vs_stage_done(self, last_game_winner):
        # Set the game over flag to True
        self.game_over = True
        # Deduct 1 from the number of Vs games before benchmarking in single player
        self.current_vs_rounds_before_single_player -= 1
        # Set the last Vs stage's winner
        self.last_vs_game_winner = last_game_winner
        # Output total rewards for this stage
        if self.debug:
            print(">Debug: Total rewards for P1 this game: " +
                  str(self.P1.total_rewards_this_game))
            print(">Debug: Total rewards for P2 this game: " +
                  str(self.P2.total_rewards_this_game))
        # If more Vs games remain to be played, continue with Vs game
        if self.current_vs_rounds_before_single_player > 0:
            if self.debug:
                print(">Debug: Vs stage done. " + str(self.current_vs_rounds_before_single_player) +
                      " Vs stage(s) remaining. \n")
        # Else, continue to single player benchmarking with the winner of the last Vs game
        else:
            self.env_player = last_game_winner
            if self.debug:
                print(">Debug: Vs stages done. Continuing to story with last Vs stage's winner: " + self.env_player)

    def on_vs_round_done(self):
        self.round_done = True
        if self.debug:
            print(">Debug: Vs round done. Advancing to next round. \n")

    # Checks whether the round or game has finished
    def check_done(self, data):

        # Get the time currently remaining from the two memory locations holding each of the two digits (from 99 to 00)
        self.time_remaining = int(data["time_remaining_tens_digit"]) * 10 + int(data["time_remaining_ones_digit"])

        # If a round has ended
        if data["current_round_winsP1"] == self.P1.expected_wins_check_done + 1 \
                or data["current_round_winsP2"] == self.P2.expected_wins_check_done + 1 \
                or (data["healthP1"] == 0 and data["healthP2"] == 0) \
                or (data["healthP1"] == data["healthP2"] and self.time_remaining == 0):
            # (data["healthP1"] == 0 and data["healthP2"] == 0) or self.time_remaining == 0
            # self.time_remaining == 0 check may trigger a bug
            self.on_round_done()

        if self.done:
            # If the round wins of P1 have incremented, P1 has won a round!
            if data["current_round_winsP1"] == self.P1.expected_wins_check_done + 1:
                if self.debug:
                    print(">Debug: Game End Condition Met -> P1 wins have incremented in game's memory.")
                    print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str(
                        data["healthP2"]) + " Current Time:" + str(self.time_remaining))
                self.P1.expected_wins_check_done = data["current_round_winsP1"]

                # If the agent is playing as player 1
                if self.env_player is 'P1':
                    # If it has reached 2 round wins, P1 (agent) has won the stage
                    if data["current_round_winsP1"] == 2:
                        self.on_single_player_stage_win()
                    # If P1 (agent) hasn't reached the round win limit, proceed to the next round
                    else:
                        self.on_single_player_round_win()
                # Else if the agent is playing as player 2
                elif self.env_player is 'P2':
                    # If it has reached 2 round wins, P1 (agent) has won the stage
                    if data["current_round_winsP1"] == 2:
                        self.on_single_player_stage_loss()
                    # If P1 (agent) hasn't reached the round win limit, proceed to the next round
                    else:
                        self.on_single_player_round_loss()
                # Else if this Vs self-play
                else:
                    # If it has reached 2 round wins, P1 has won the Vs stage
                    if data["current_round_winsP1"] == 2:
                        self.on_vs_stage_done('P1')
                    # If P1 hasn't reached the round win limit, proceed to the next round
                    else:
                        self.on_vs_round_done()

            # If the round wins of P2 have incremented, P2 has won a round!
            elif data["current_round_winsP2"] == self.P2.expected_wins_check_done + 1:
                if self.debug:
                    print(">Debug: Game End Condition Met -> P2 wins have incremented in game's memory.")
                    print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str(
                        data["healthP2"]) + " Current Time:" + str(self.time_remaining))
                self.P2.expected_wins_check_done = data["current_round_winsP2"]

                # If the agent is playing as player 2
                if self.env_player is 'P2':
                    # If it has reached 2 round wins, P2 (agent) has won the stage
                    if data["current_round_winsP2"] == 2:
                        self.on_single_player_stage_win()
                    # If P2 (agent) hasn't reached the round win limit, proceed to the next round
                    else:
                        self.on_single_player_round_win()
                # Else if the agent is playing as player 1
                elif self.env_player is 'P1':
                    # If agent is P1 and P2 (CPU) wins 2 rounds, the game is over for the agent
                    if data["current_round_winsP2"] == 2:
                        self.on_single_player_stage_loss()
                    # If P2 (CPU) hasn't reached the round win limit, proceed to the next round
                    else:
                        self.on_single_player_round_loss()
                # Else if this Vs self-play
                else:
                    # If it has reached 2 round wins, P2 has won the Vs stage
                    if data["current_round_winsP2"] == 2:
                        self.on_vs_stage_done('P2')
                    # If P2 hasn't reached the round win limit, proceed to the next round
                    else:
                        self.on_vs_round_done()

            # If no round wins have incremented but both players' health has reached 0
            # or the time has reached 00 and both players have equal health, it's a draw!
            elif data["healthP1"] == 0 and data["healthP2"] == 0:
                if self.debug:
                    print(">Debug: Game End Condition Met -> Both P1 and P2 health values have reached 0 (Draw).")
                    print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str(
                        data["healthP2"]) + " Current Time:" + str(self.time_remaining))
                    print(">Debug: Draw! Advancing to next round. \n")
                self.round_done = True
            elif data["healthP1"] == data["healthP2"] and self.time_remaining == 0:
                if self.debug:
                    print(
                        ">Debug: Game End Condition Met -> Time has ran out with P1 and P2 health values equal (Draw).")
                    print("P1 Health:" + str(data["healthP1"]) + " P2 Health:" + str(
                        data["healthP2"]) + " Current Time:" + str(self.time_remaining))
                    print(">Debug: Draw! Advancing to next round. \n")
                self.round_done = True
            else:
                raise EnvironmentError('Done flag enabled but game done condition not detected in check_done')

        return data

    def reset(self):
        if self.game_over:
            if not self.self_play:
                self.new_game_after_loss()
            else:
                if self.env_player is 'Vs' and not self.finished_single_player:
                    self.vs_continue(self.last_vs_game_winner)
                elif self.env_player is 'Vs' and self.finished_single_player:
                    self.vs_after_single_player()
                else:
                    self.single_player_after_vs()
        elif self.game_completed:
            self.new_game_after_completion()
        elif self.stage_done:
            self.next_stage()
        elif self.round_done:
            self.next_round()
        else:
            raise EnvironmentError("Reset called while gameplay still running")

    def log_stage_milestone(self):
        assert(self.env_player is 'P1' or self.env_player is 'P2')
        # Create milestones folder if it does not exist
        if not os.path.exists('milestones'):
          os.mkdir('milestones')
        # Create a text file called the [name of the environment] + "_milestones" if it does not exist
        # Open the text file with write permission
        if not self.logged_first_milestone:
            # If this is a new training, create or overwrite existing milestones file
            f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'w+')
        else:
            # If this is a continuation from a previous session and a milestone file exists, 
            # append to the milestones file
            f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'a+')
            # Create a newline
            f.write('\n')
        # Write the milestone
        f.write(self.env_id + " has managed to defeat stage " + str(self.stage) + " after " +
                str(self.total_episodes_played) + " total episodes playing as " + self.env_player + " with " +
                self.P1.character_name if self.env_player is 'P1' else self.P2.character_name + ".\r\n")
        # Close the milestones file
        f.close()

    def log_milestone(self):
        assert(self.env_player is 'P1' or self.env_player is 'P2')
        # Create milestones folder if it does not exist
        if not os.path.exists('milestones'):
          os.mkdir('milestones')
        # Create a text file called the [name of the environment] + "_milestones" if it does not exist
        # Open the text file with write permission
        if not self.logged_first_milestone:
            # If this is a new training, create or overwrite existing milestones file
            f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'w+')
        else:
            # If this is a continuation from a previous session and a milestone file exists, 
            # append to the milestones file
            f = open(os.path.join('milestones', self.env_id + '_milestones.txt'), 'a+')
            # Create a newline
            f.write('\n')
        # Write the milestone
        f.write(self.env_id + " has managed to complete the " + str(self.path) + " path on the "
                + str(self.difficulties[self.difficulty]) + " difficulty after " + str(self.total_episodes_played)
                + " total episodes as " + self.env_player + " with " + self.P1.character_name if self.env_player is 'P1'
                else self.P2.character_name + ".\r\n")
        # Close the milestones file
        f.close()

    # Safely closes emulator
    def close(self):
        self.emu.close()