def handle_play(self, game_frame):
        self.game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                                         frame_type="PIPELINE")
        frame_buffer = self.game_frame_buffer.frames

        for model_name in self.game.api_class.SpriteLocator.sprite_models:
            print("model is", model_name)

            print(
                self.game.api_class.SpriteLocator.sprite_recognized(
                    game_frame=game_frame,
                    screen_region=self.game.screen_regions[model_name],
                    model_name=model_name,  #screen_region_frame,
                    classes=self.game.api_class.SpriteLocator.
                    sprite_models[model_name]["classes"]))

        print(game_frame.frame.shape)
        for i, game_frame in enumerate(frame_buffer):
            self.visual_debugger.store_image_data(game_frame.frame,
                                                  game_frame.frame.shape,
                                                  str(i))

        #self.game_frame_buffer.append(game_frame.frame)
        #print("game_frame_buffer:", frame_buffer)
        #if len(frame_buffer) >= self.memory_timeframe:
        # there is enough frames stored to train the network

        move_per_timestep = self.model.decide(frame_buffer)
        score = self.model.evaluate_move(move_per_timestep)
        self.model.update_weights(frame_buffer, score)
예제 #2
0
    def setup_play(self):

        self.last_frame_had_position = False
        self.fill_count = 0
        self.working_trade = False
        self.episode_count = 0

        self.current_action = None
        self.last_action = None
        self.repeat_count = 0

        self.sell_point = (743, 406)
        self.buy_point = (743, 429)
        self.pull_point = (5, 117)
        self.held = False
        self.scraper = T4Scraper(self.game, self.visual_debugger)
        self.frame_buffer = None

        self.scraper.current_frame = FrameGrabber.get_frames([0]).frames[0]
        self.pl = self.scraper.get_pl()
        self.fill_count = self.scraper.get_position_and_fill_count()[1]
        game_inputs = {"Buy": 1, "Sell": 2, "Hold": 3}

        self.ppo_agent = SerpentPPO(frame_shape=(164, 264, 4),
                                    game_inputs=game_inputs)

        try:
            self.ppo_agent.agent.restore(
                directory=os.path.join(os.getcwd(), "datasets", "t4dowmodel"))
#             self.ppo_agent.agent.restore(directory=os.path.join(os.getcwd(), "datasets", "t4simmodel"))
        except Exception:
            pass
    def setup_play(self):
        self.game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                                         frame_type="PIPELINE")
        print("game_frame_buffer:", self.game_frame_buffer)
        #self.ppo_agent.generate_action(game_frame_buffer)

        self.window_dim = (self.game.window_geometry['height'],
                           self.game.window_geometry['width'], 3)
        self.model = KerasDeepKingdom(
            time_dim=(self.memory_timeframe, ),
            game_frame_dim=self.window_dim)  # (600, 960, 3))#(360, 627, 3))
        print("Screen_regions:", self.game.screen_regions)
        for region in self.game.screen_regions:
            print("Region is", region)
            # TODO: Fix this absolut pointer
            path = "C:\\SerpentAI\datasets\collect_frames"
            classes = [
                name for name in os.listdir(path)
                if os.path.isdir(os.path.join(path, name))
                and name.__contains__(region + "_")
            ]
            print("directory contains: ", classes)

            if os.path.isfile(region + "_trained_model.h5"):
                print("Loading model", region, "from file")
                self.game.api_class.SpriteLocator.load_model(model_name=region,
                                                             classes=classes)
            else:
                print("Building and training", region, "network from scratch")
                self.game.api_class.SpriteLocator.construct_sprite_locator_network(
                    model_name=region,
                    screen_region=self.game.screen_regions[region],
                    classes=classes)
                self.game.api_class.SpriteLocator.train_model(
                    classes=classes, model_name=region)
 def setup_play(self):
     
     self.fill_count = 0
     self.working_trade = False
     self.episode_count = 0
     
     self.buy_point = (326, 334)
     self.sell_point = (647, 634)
 
     self.scraper = T4Scraper(self.game, self.visual_debugger)
     self.frame_buffer = None
     
     self.scraper.current_frame = FrameGrabber.get_frames([0]).frames[0]
     self.pl = self.scraper.get_pl()
     self.fill_count = self.scraper.get_position_and_fill_count()[1]
     game_inputs = {
         "Buy": 1,
         "Sell": 2
     }
     
     self.ppo_agent = SerpentPPO(
         frame_shape=(248, 510, 2),
         game_inputs=game_inputs
     )
     
     try:
         self.ppo_agent.agent.restore(directory=os.path.join(os.getcwd(), "datasets", "t4androidmodel"))
     except Exception:
         pass
    def handle_play(self, game_frame, game_frame_pipeline):
        valid_game_state = self.environment.update_game_state(game_frame)

        if not valid_game_state:
            return None

        reward = self.reward_aisaac(self.environment.game_state, game_frame)

        terminal = (
            not self.environment.game_state["isaac_alive"] or
            self.environment.game_state["boss_dead"] or
            self.environment.episode_over
        )

        self.agent.observe(reward=reward, terminal=terminal)

        if not terminal:
            frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE")
            agent_actions = self.agent.generate_actions(frame_buffer)

            self.environment.perform_input(agent_actions)
        else:
            self.environment.clear_input()

            self.agent.reset()

            if self.environment.game_state["boss_dead"]:
                self.analytics_client.track(event_key="BOSS_KILL", data={"foo": "bar"})

            self.environment.end_episode()
            self.environment.new_episode(maximum_steps=960, reset=self.agent.mode.name != "TRAIN")
예제 #6
0
    def handle_play(self, game_frame, game_frame_pipeline):
        self.paused_at = None

        valid_game_state = self.environment.update_game_state(game_frame)

        if not valid_game_state:
            return None

        reward = self.reward(self.environment.game_state)

        terminal = (
            self.environment.game_state["is_too_slow"] or
            self.environment.game_state["is_out_of_fuel"] or
            self.environment.game_state["is_race_over"] or
            self.environment.episode_over
        )

        self.agent.observe(reward=reward, terminal=terminal)

        if not terminal:
            game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE")
            agent_actions = self.agent.generate_actions(game_frame_buffer)

            self.environment.perform_input(agent_actions)
        else:
            self.environment.clear_input()
            self.agent.reset()

            if self.environment.game_state["is_race_over"]:
                time.sleep(5)
                self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
                time.sleep(11)

                if (self.environment.episode + 1) % self.environment.episodes_per_race_track == 0:
                    self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)
                    time.sleep(8)

                    self.game.api.select_random_region_track(self.input_controller)
            else:
                self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)
                time.sleep(1)

                if (self.environment.episode + 1) % self.environment.episodes_per_race_track == 0:
                    for _ in range(3):
                        self.input_controller.tap_key(KeyboardKey.KEY_S)
                        time.sleep(0.1)

                    self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
                    time.sleep(1)
                    self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
                    time.sleep(8)
                    self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)
                    time.sleep(1)
            
                    self.game.api.select_random_region_track(self.input_controller)

            self.environment.end_episode()
            self.environment.new_episode(maximum_steps=2400)
    def handle_play(self, game_frame):
        self.move_time_start = time.time()
        self.game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                                         frame_type="PIPELINE")
        frame_buffer = self.game_frame_buffer.frames

        context_frame = FrameGrabber.get_frames([0],
                                                frame_type="FULL").frames[0]
        context = self.machine_learning_models["context_classifier"].predict(
            frame_buffer[0].frame)  # context_frame.frame)
        self.game_state.current_context = context
        self.game_state.not_playing_context_counter += 1
        self.move_time = time.time() - self.move_time_start

        #print(context)
        if (context is None or context in ["ofdp_playing", "ofdp_game"
                                           ]) and self.game_state.health > 0:
            # If we are currently playing an episode
            if self.game_state.read_kill_count() > 10000:
                # If kill_count gets really large during the episode we should ignore the episode
                # This happens if the memory address for kill count changes for this episode
                # and we should just ignore the episode
                self.agent.printer.print_error()
                return

            self.make_a_move(frame_buffer, context_frame)
            self.game_state.not_playing_context_counter = 0
            return
        else:
            # This is a hack to avoid runs being ended early due to
            # context classifier getting the wrong context while playing
            if self.game_state.not_playing_context_counter < 5:
                return

        # Navigate context menu if identified
        self.do_splash_screen_action(context)
        self.do_main_menu_actions(context)
        self.do_mode_menu_action(context)
        self.do_survival_menu_action(context)
        self.do_survival_pre_game_action(context)
        self.do_game_paused_action(context)
        self.do_game_end_highscore_action(context)
        self.do_game_end_score_action(context)
예제 #8
0
    def handle_play(self, game_frame):

        #         for i, game_frame in enumerate(self.game_frame_buffer.frames):
        #             self.visual_debugger.store_image_data(
        #                 game_frame.frame,
        #                 game_frame.frame.shape,
        #                 str(i)
        #             )
        #

        self.scraper.current_frame = game_frame
        if not self.held and self.has_open_positions():
            self.last_frame_had_position = True
            return

        if self.last_frame_had_position:
            self.last_frame_had_position = False
            return

        self.episode_count += 1
        reward = self.reward_agent()

        self.ppo_agent.observe(reward, terminal=False)

        self.frame_buffer = game_frame
        self.frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                                    frame_type="PIPELINE")
        self.frame_buffer = self.extract_game_area(self.frame_buffer)

        self.visual_debugger.store_image_data(self.frame_buffer[0],
                                              self.frame_buffer[0].shape, 2)
        self.visual_debugger.store_image_data(self.frame_buffer[3],
                                              self.frame_buffer[3].shape, 3)
        action, label, game_input = self.ppo_agent.generate_action(
            self.frame_buffer)
        print(label)
        self.current_action = label
        if game_input == 1:
            # perform buy
            self.working_trade = True
            self.input_controller.move(x=self.buy_point[0],
                                       y=self.buy_point[1])
            self.input_controller.click()

        elif game_input == 2:
            # perform sell
            self.working_trade = True
            self.input_controller.move(x=self.sell_point[0],
                                       y=self.sell_point[1])
            self.input_controller.click()

        elif game_input == 3:
            self.held = True
예제 #9
0
    def reset(self):
        self.frame_buffer = FrameGrabber.get_frames([0], frame_type="PIPELINE")
        self.frame_buffer = self.extract_game_area(self.frame_buffer)

        # states = np.stack(
        #     self.frame_buffer,
        #     axis=2
        # )

        print('GETTING STATES')
        print(self.frame_buffer[0])
        print(self.frame_buffer[0].shape)
        return self.frame_buffer[0], 0, False, {}
    def setup_play(self):
        self.run_count = 0
        self.run_reward = 0

        self.observation_count = 0

        self.delay_fuzzing_durations = [0.05, 0.1, 0.2]
        self.delay_fuzzing_observation_cap = 100000

        self.performed_inputs = collections.deque(list(), maxlen=8)

        self.reward_10 = collections.deque(list(), maxlen=10)
        self.reward_100 = collections.deque(list(), maxlen=100)
        self.reward_1000 = collections.deque(list(), maxlen=1000)

        self.average_reward_10 = 0
        self.average_reward_100 = 0
        self.average_reward_1000 = 0

        self.score_10 = collections.deque(list(), maxlen=10)
        self.score_100 = collections.deque(list(), maxlen=100)
        self.score_1000 = collections.deque(list(), maxlen=1000)

        self.average_score_10 = 0
        self.average_score_100 = 0
        self.average_score_1000 = 0

        self.top_score = 0
        self.top_score_run = 0

        self.previous_score = 0

        # Measured on the Serpent.AI Lab Stream (Feb 12 2018)
        self.random_average_score = 67.51
        self.random_top_score = 5351
        self.random_runs = 2700

        self.death_check = False
        self.just_relaunched = False

        self.frame_buffer = None

        self.ppo_agent = SerpentPPO(frame_shape=(100, 100, 4),
                                    game_inputs=self.game_inputs)

        # Warm Agent?
        game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6],
                                                    frame_type="PIPELINE")
        self.ppo_agent.generate_action(game_frame_buffer)

        self.started_at = datetime.utcnow().isoformat()
예제 #11
0
    def handle_play(self, game_frame, game_frame_pipeline):
        self.paused_at = None
        with mss() as sct:
            monitor_var = sct.monitors[1]
            monitor = sct.grab(monitor_var)
            valid_game_state = self.environment.update_startregions_state(monitor)
        if not valid_game_state:
            return None

        reward, over_boolean = self.reward(self.environment.startregions_state, 1.0)
        terminal = over_boolean

        self.agent.observe(reward=reward, terminal=terminal)

        if not terminal:
            game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE")
            agent_actions = self.agent.generate_actions(game_frame_buffer)
            print("Current Action: ")
            print(agent_actions)
            str_agent_actions = str(agent_actions)
            if "MOVE MOUSE X" in str_agent_actions:
                set_pos(200, 0)
            if "MOVE MOUSE Y" in str_agent_actions:
                set_pos(0, 200)
            if "MOVE MOUSE XY" in str_agent_actions:
                set_pos(100, 100)
            if "MOVE MOUSE X2" in str_agent_actions:
                set_pos(-200, 0)
            if "MOVE MOUSE Y2" in str_agent_actions:
                set_pos(0, -200)
            if "MOVE MOUSE XY2" in str_agent_actions:
                set_pos(-100, -100)
            if "MOVE MOUSE XY3" in str_agent_actions:
                set_pos(-100, 100)
            if "MOVE MOUSE XY4" in str_agent_actions:
                set_pos(100, -100)
            if "LETHAL" in str_agent_actions:
                self.input_non_lethal = True
            self.human()
            self.environment.perform_input(agent_actions)
        else:
            self.environment.clear_input()
            self.agent.reset()

            time.sleep(30)
            #To Do
            #Choose Loadout (Meduim Range)
            self.environment.end_episode()
            self.environment.new_episode(maximum_steps=350)
            print("New Episode")
예제 #12
0
    def step(self, action):
        if action == 1:
            # perform buy

            self.working_trade = True
            self.input_controller.move(x=self.buy_point[0],
                                       y=self.buy_point[1])
            self.input_controller.click()

        elif action == 2:
            # perform sell

            self.working_trade = True
            self.input_controller.move(x=self.sell_point[0],
                                       y=self.sell_point[1])
            self.input_controller.click()

        while self.has_open_positions():
            sleep(0.1)

            frame = FrameGrabber.get_frames([0]).frames[0]
            self.scraper.current_frame = frame
            pass

        reward = self.reward_agent()

        self.frame_buffer = FrameGrabber.get_frames([0], frame_type="PIPELINE")
        self.frame_buffer = self.extract_game_area(self.frame_buffer)

        # states = np.stack(
        #     self.frame_buffer,
        #     axis=2
        # )

        print('GETTING STATES')
        # print(states.shape)
        return self.frame_buffer[0], reward, False, {}
예제 #13
0
    def setup(self, ppo_agent, model_name, metadata_key):
    
        self.keys = RedisKeys(metadata_key)
        self.redis = redis.Redis(port=6001)
    
        self.ppo_agent = ppo_agent
        self.history = list()
        self.m_k = metadata_key
        self.buys = 0
        self.sells = 0
        self.last_frame_had_position = False
        self.fill_count = 0
        self.working_trade = False
        self.episode_count = 0

        self.number_of_trades = 0
        self.number_of_wins = 0
        self.current_action = None
        self.last_action = None
        self.repeat_count = 0

        self.sell_point = (671, 447)
        self.buy_point = (669, 476)
        self.pull_point = (562, 173)
        self.held = False
        self.scraper = T4Scraper(self.s.game, self.s.visual_debugger)
        self.frame_buffer = None

        self.scraper.current_frame = FrameGrabber.get_frames([0]).frames[0]
        self.pl = self.scraper.get_pl()
        self.fill_count = self.scraper.get_position_and_fill_count()[1]
        
        self.model_name = model_name

        
        print('AFTER INIT AGENT')
        try:
            self.ppo_agent.agent.restore(directory=os.path.join(
                os.getcwd(), "datasets", self.model_name))
#             self.ppo_agent.agent.restore(directory=os.path.join(os.getcwd(), "datasets", "t4simmodel"))
        except Exception:
            pass
            
        print('AFTER RESTORE')
        
        self.get_metadata()
예제 #14
0
    def handle_data(self, game_frame, game_frame_pipeline):
        hp_int = self._measure_actor_hp()
        try:
            if hp_int < 10:
                terminal = {1: False}
            else:
                terminal = {0: True}
        except:
            terminal = terminal = {1: False}
        self.reward_observe = self.reward_ai()
        self.agent.observe(reward=self.reward_observe, terminal=terminal)

        frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6],
                                               frame_type="PIPELINE")
        self.game_input = self.agent.generate_actions(frame_buffer)
        self.game_input = str(self.game_input)
        print(self.game_input)
        return self.game_input
예제 #15
0
    def handle_play(self, game_frame):
        # append memory data into game state
        (self.game_state["com_x"], self.game_state["com_y"], self.ai_x, self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc, self.col_size, self.game_state["col_x"], self.game_state["col_y"]) = readInfo()
        self.game_state["ai_x"].appendleft(self.ai_x)
        self.game_state["ai_y"].appendleft(self.ai_y)
        self.game_state["ball_x"].appendleft(self.ball_x)
        self.game_state["ball_y"].appendleft(self.ball_y)
        self.game_state["ai_score"].appendleft(self.ai_sc)
        self.game_state["com_score"].appendleft(self.com_sc)
        self.game_state["col_size"].appendleft(self.col_size)

        # judge is-in-game by read pixel value (tricky)
        self.game_frame_img = FrameGrabber.get_frames([0], frame_type="PIPELINE").frames[0].frame
        if self.game_frame_img[91, 49] != 0.3607843137254902:
            self.handle_notInGame()
        else:
            self.game_state["playing"] = True
            self.handle_fight(game_frame)
예제 #16
0
    def handle_play(self, game_frame):
        # locate sprite position and existence
        '''
        logo_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_LOGO'], game_frame=game_frame)
        menu_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_MENU'], game_frame=game_frame)
        game_set_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_GAME_SET'], game_frame=game_frame)
        '''

        # append memory data into game state
        (self.game_state["com_x"], self.game_state["com_y"], self.ai_x,
         self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc,
         self.col_size, self.game_state["col_x"],
         self.game_state["col_y"]) = readInfo()
        self.game_state["ai_x"].appendleft(self.ai_x)
        self.game_state["ai_y"].appendleft(self.ai_y)
        self.game_state["ball_x"].appendleft(self.ball_x)
        self.game_state["ball_y"].appendleft(self.ball_y)
        self.game_state["ai_score"].appendleft(self.ai_sc)
        self.game_state["com_score"].appendleft(self.com_sc)
        self.game_state["col_size"].appendleft(self.col_size)

        self.handle_frame_process(game_frame)
        '''
        if(logo_locator):
            print('Entering Logo...')
            self.game_state["playing"] = False
            self.handle_menu()
        elif (menu_locator):
            print('Entering Menu...')
            self.game_state["playing"] = False
            self.handle_menu()
        elif (game_set_locator):
            print('Game Set!')
            self.handle_fight_end(game_frame)
        '''

        # judge is-in-game by read pixel value (tricky)
        self.game_frame_img = FrameGrabber.get_frames(
            [0], frame_type="PIPELINE").frames[0].frame
        if self.game_frame_img[100, 81] != 0.7137254901960784:
            self.handle_notInGame()
        else:
            self.game_state["playing"] = True
            self.handle_fight(game_frame)
예제 #17
0
    def select_random_track(self, input_controller):
        input_controller.handle_keys([])

        start_world_region = None

        while start_world_region is None:
            game_frame_buffer = FrameGrabber.get_frames([0])
            game_frame = game_frame_buffer.frames[0]

            start_world_region = self.identify_world_region(game_frame)

        end_world_region = random.choice(range(0, 9))

        self.go_to_world_region(start_world_region, end_world_region,
                                input_controller)

        input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(1)

        possible_keys = [
            KeyboardKey.KEY_W, KeyboardKey.KEY_A, KeyboardKey.KEY_S,
            KeyboardKey.KEY_D
        ]

        for _ in range(30):
            input_controller.tap_key(random.choice(possible_keys))
            time.sleep(0.05)

        input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(1)

        possible_keys = [KeyboardKey.KEY_A, KeyboardKey.KEY_D]

        for _ in range(30):
            input_controller.tap_key(random.choice(possible_keys))
            time.sleep(0.05)

        input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(1)
예제 #18
0
    def handle_play(self, game_frame, game_frame_pipeline):
        valid_game_state = self.environment.update_game_state(game_frame)

        if not valid_game_state:
            return None

        move_reward, attack_reward = self.reward_aisaac(
            self.environment.game_state, game_frame)

        terminal = (not self.environment.game_state["isaac_alive"]
                    or self.environment.game_state["boss_dead"]
                    or self.environment.episode_over)

        self.agent.observe(move_reward=move_reward,
                           attack_reward=attack_reward,
                           terminal=terminal,
                           boss_hp=self.environment.game_state["boss_hp"],
                           isaac_hp=self.environment.game_state["isaac_hp"])

        if not terminal:
            #[0, 2, 4, 6] to [0, 2]
            # 30fps, look at current frame and frame from 1/30 s/f * 4 = 0.13 seconds ago
            frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6],
                                                   frame_type="PIPELINE")
            agent_actions = self.agent.generate_actions(frame_buffer)
            #print(agent_actions)
            self.environment.perform_input(agent_actions)
        else:
            self.environment.clear_input()

            self.agent.reset()

            if self.environment.game_state["boss_dead"]:
                self.analytics_client.track(event_key="BOSS_KILL",
                                            data={"foo": "bar"})

            self.environment.end_episode()
            self.environment.new_episode(maximum_steps=3840, reset=False)
예제 #19
0
    def handle_fight(self, game_frame):
        gc.disable()
        if self.dqn_action.first_run:
            self.dqn_action.first_run = False
            return

        if self.dqn_action.frame_stack is None:
            game_frame_buffer = FrameGrabber.get_frames(
                [0], frame_type="PIPELINE").frames[0]
            self.dqn_action.build_frame_stack(game_frame_buffer.frame)
        else:
            # saving frame pic to analyze
            #self.cid = self.cid + 1
            #game_frame_img = FrameGrabber.get_frames([0], frame_type="PIPELINE").frames[0]
            #skimage.io.imsave(f"frame{self.cid}.png", game_frame_img.frame)
            game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                                        frame_type="PIPELINE")

            if self.dqn_action.mode == "TRAIN":
                reward = self._calculate_reward()

                self.game_state["reward"] += reward

                self.dqn_action.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal=self.game_state["ai_score"][0] == 15)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_action.current_step % 1000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"model/fighting_movement")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_action.current_step % 10000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"model/fighting_movement",
                        is_checkpoint=True)
            elif self.dqn_action.mode == "RUN":
                self.dqn_action.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at
            serpent.utilities.clear_terminal()
            print('')
            print(Fore.YELLOW)
            print(Style.BRIGHT)
            print(f"STARTED AT:{self.started_at_str}")
            print(
                f"RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} s"
            )

            print(Style.RESET_ALL)
            #print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_action.output_step_data()
            print(Style.RESET_ALL)
            print(Style.BRIGHT)
            print(f"CURRENT RUN: {self.game_state['current_run'] }")
            print("")
            print(
                f"CURRENT RUN   REWARD: {round(self.game_state['reward'], 4)}")
            print(f"CURRENT AI    SCORE: {self.game_state['ai_score'][0]}")
            print(f"CURRENT ENEMY SCORE: {self.game_state['com_score'][0]}")
            print("")
            print(
                f"PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(Style.RESET_ALL)

            self.dqn_action.pick_action()
            self.dqn_action.generate_action()

            movement_keys = self.dqn_action.get_input_values()

            print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            #print(movement_keys)
            print("" + " + ".join(
                list(map(lambda k: self.key_mapping.get(k), movement_keys))))
            print(Style.RESET_ALL)
            print("")
            print(
                f"AI:        ({self.game_state['ai_x'][0]}, {self.game_state['ai_y'][0]})"
            )
            print(
                f"COM:       ({self.game_state['com_x']}, {self.game_state['com_y']})"
            )
            print(
                f"BALL:      ({self.game_state['ball_x'][0]}, {self.game_state['ball_y'][0]})"
            )
            print(
                f"Collision: ({self.game_state['col_x']}, {self.game_state['col_y']}, {self.game_state['col_size'][0]})"
            )
            print(f"Distance:   {self.game_state['distance'][0]}")

            self.input_controller.handle_keys(movement_keys)

            if self.dqn_action.current_action_type == "PREDICTED":
                self.game_state["run_predicted_actions"] += 1

            self.dqn_action.erode_epsilon(factor=2)
            self.dqn_action.next_step()

            self.game_state["current_run"] += 1

            if self.game_state['ai_score'][0] == 15 or self.game_state[
                    'com_score'][0] == 15:
                # Game over
                self.game_state["ai_score"].appendleft(0)
                self.game_state["com_score"].appendleft(0)
                self.handle_fight_end(game_frame)
예제 #20
0
    def train_ddqn(self, game_frame):
        if self.dqn_movement.first_run:
            self.dqn_movement.first_run = False
            self.dqn_projectile.first_run = False
            return None

        heart = frame_to_hearts(game_frame.frame, self.game)
        score = self._process_ocr(game_frame)
        self.get_reward_state(heart, score)

        if self.dqn_movement.frame_stack is None:
            pipline_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE",
                dtype="float64"
            ).frames[0]
            print(np.shape(pipline_game_frame.frame))
            # self.dqn_movement.build_frame_stack(pipline_game_frame.frame)

            self.dqn_movement.frame_stack = self._build_frame_stack(pipline_game_frame.frame)
            self.dqn_projectile.frame_stack = self.dqn_movement.frame_stack

        else:
            game_frame_buffer = FrameGrabber.get_frames(
                # [0, 4, 8, 12],
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE",
                dtype="float64"
            )

            if self.dqn_movement.mode == "TRAIN":
                self.game_state["run_reward_movement"] += self.reward
                self.game_state["run_reward_projectile"] += self.reward

                self._movement_append_to_replay_memory(
                    game_frame_buffer,
                    self.reward,
                    terminal=self.game_over
                )

                self._projectile_append_to_replay_memory(
                    game_frame_buffer,
                    self.reward,
                    terminal=self.game_over
                )

                #Every 2000 steps, save latest weights to disk
                if self.dqn_movement.current_step % 2000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/binding_of_isacc_movement"
                    )
                    self.dqn_projectile.save_model_weights(
                        file_path_prefix=f"datasets/binding_of_isaac_projectile"
                    )

                #Every 20000 steps, save weights checkpoint to disk
                if self.dqn_movement.current_step % 20000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/c_binding_of_isaac_movement",
                        is_checkpoint=True
                    )
                    self.dqn_projectile.save_model_weights(
                        file_path_prefix=f"datasets/c_binding_of_isaac_projectile",
                        is_checkpoint=True
                    )

            elif self.dqn_movement.mode == "RUN":
                game_frames = [game_frame.frame for game_frame in game_frame_buffer.frames]
                self.dqn_movement.frame_stack = np.array(game_frames)
                self.dqn_projectile.frame_stack = np.array(game_frames)

            run_time = datetime.now() - self.started_at
            serpent.utilities.clear_terminal()

            print(f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours,"
                  f" {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds")

            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_movement.output_step_data()
            print(f"reward:{self.reward}")

            print("PROJECTILE NEURAL NETWORK:\n")
            self.dqn_projectile.output_step_data()

            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(f"CURRENT RUN REWARD: "
                  f"{round(self.reward + self.reward, 2)}")

            print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}")
            print(f"CURRENT HEALTH: {heart}")
            print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds")

            print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds "
                  f"(Run {self.game_state['record_time_alive'].get('run')}, "
                  f"{'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'}")

            print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds")


            if self.game_over == True:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    #Check for Records
                    if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_movement.mode == "RUN"
                        }

                else:
                    self.game_state["random_time_alives"].append(self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(self.game_state["random_time_alives"])

                self.game_state["current_run_state"] = 0
                self.input_controller.handle_keys([])

                if self.dqn_movement.mode == "TRAIN":
                    for i in range(16):
                        serpent.utilities.clear_terminal()
                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/16")
                        print(f"NEXT RUN: {self.game_state['current_run'] + 1} "
                              f"{'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}")

                        self.dqn_movement.train_on_mini_batch()
                        self.dqn_projectile.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_movement"] = 0
                self.game_state["run_reward_projectile"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.s_p1 = 16
                self.game_over = False
                self.reward = 0

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0:
                        self.dqn_movement.update_target_model()
                        self.dqn_projectile.update_target_model()

                    if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0:
                        self.dqn_movement.enter_run_mode()
                        self.dqn_projectile.enter_run_mode()
                    else:
                        self.dqn_movement.enter_train_mode()
                        self.dqn_projectile.enter_train_mode()

                return None

        self.dqn_movement.pick_action()
        self.dqn_movement.generate_action()

        self.dqn_projectile.pick_action(action_type=self.dqn_movement.current_action_type)
        self.dqn_projectile.generate_action()

        try:
            _thread.start_new_thread(self._execute_action, ("Thread", ))
        except Exception as e:
            print(e)

        if self.dqn_movement.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_movement.erode_epsilon(factor=2)
        self.dqn_projectile.erode_epsilon(factor=2)

        self.dqn_movement.next_step()
        self.dqn_projectile.next_step()

        self.game_state["current_run_steps"] += 1
예제 #21
0
    def grab_latest_frame(self):
        game_frame_buffer = FrameGrabber.get_frames(
            [0], (self.window_geometry.get("height"),
                  self.window_geometry.get("width"), 3))

        return game_frame_buffer.frames[0]
예제 #22
0
    def setup_play(self):
        self.game_inputs = {
            "MOVE UP": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_W)
            ],
            "MOVE LEFT": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_A)
            ],
            "MOVE DOWN": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_S)
            ],
            "MOVE RIGHT": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_D)
            ],
            "MOVE TOP-LEFT": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_W),
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_A)
            ],
            "MOVE TOP-RIGHT": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_W),
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_D)
            ],
            "MOVE DOWN-LEFT": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_S),
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_A)
            ],
            "MOVE DOWN-RIGHT": [
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_S),
                KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_D)
            ],
            "SHOOT UP": [
                MouseEvent(MouseEvents.CLICK, MouseButton.LEFT)
            ],
            "SHOOT LEFT": [
                MouseEvent(MouseEvents.CLICK, MouseButton.RIGHT)
            ],
            "DON'T MOVE": []
        }

        self.ppo_agent = SerpentPPO(
            frame_shape=(125, 112, 4),
            game_inputs=self.game_inputs
        )

        self.first_run = True
        self.game_over = False
        self.run_count = 0
        self.run_reward = 0

        self.observation_count = 0
        self.episode_observation_count = 0

        self.performed_inputs = collections.deque(list(), maxlen=8)

        self.reward_10 = collections.deque(list(), maxlen=10)
        self.reward_100 = collections.deque(list(), maxlen=100)
        self.reward_1000 = collections.deque(list(), maxlen=1000)

        self.rewards = list()

        self.average_reward_10 = 0
        self.average_reward_100 = 0
        self.average_reward_1000 = 0

        self.top_reward = 0
        self.top_reward_run = 0

        self.previous_score = 0

        self.score_10 = collections.deque(list(), maxlen=10)
        self.score_100 = collections.deque(list(), maxlen=100)
        self.score_1000 = collections.deque(list(), maxlen=1000)

        self.average_score_10 = 0
        self.average_score_100 = 0
        self.average_score_1000 = 0

        self.best_score = 0
        self.best_score_run = 0

        self.just_relaunched = False

        self.frame_buffer = None

        try:
            self.ppo_agent.agent.restore_model(
                directory=os.path.join(
                    os.getcwd(), "datasets", "pacai"))
            self.restore_metadata()
        except Exception:
            pass

        self.analytics_client.track(
            event_key="INITIALIZE", data=dict(
                episode_rewards=[]))

        for reward in self.rewards:
            self.analytics_client.track(
                event_key="EPISODE_REWARD", data=dict(
                    reward=reward))
            time.sleep(0.01)

        # Warm Agent?
        game_frame_buffer = FrameGrabber.get_frames(
            [0, 1, 2, 3], frame_type="PIPELINE")
        game_frame_buffer = self.extract_game_area(game_frame_buffer)
        self.ppo_agent.generate_action(game_frame_buffer)

        self.score = collections.deque(np.full((16,), 0), maxlen=16)
        self.lives = collections.deque(np.full((16,), 3), maxlen=16)
        self.continuity_bonus = 0

        self.started_at = datetime.utcnow().isoformat()
        self.episode_started_at = None

        self.paused_at = None

        print("Enter - Auto Save")
        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(2)
        print("Enter - Menu")
        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(1)
        print("Enter - Start game")
        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(1)

        # Make sure to initialize Game() after passing the Start game menu,
        # otherwise the pointers may not be fully loaded.
        self.game_data = Game()
        return
예제 #23
0
    def handle_play(self, game_frame):
        if self.first_run:
            self.run_count += 1
            self.first_run = False
            self.episode_started_at = time.time()

            return None

        self.printer.add("")
        self.printer.add("Log234 - Pac-AI")
        self.printer.add("Reinforcement Learning: Training a PPO Agent")
        self.printer.add("")
        self.printer.add(f"Stage Started At: {self.started_at}")
        self.printer.add(f"Current Run: #{self.run_count}")
        self.printer.add("")

        if self.game_data.IsPaused():
            if self.paused_at is None:
                self.paused_at = time.time()

            # Give ourselves 30 seconds to work with
            if time.time() - self.paused_at >= 30:
                self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)
                time.sleep(1)
                return

            self.printer.add("The game is paused.")
            self.printer.flush()
            return
        else:
            self.paused_at = None

        self.score.appendleft(self.game_data.GetScore())
        self.printer.add(f"Score: {self.score[0]}")
        self.lives.appendleft(self.game_data.GetLives())
        self.printer.add(f"Lives: {self.lives[0]}")

        reward = self.reward_agent()

        self.printer.add(f"Current Reward: {round(reward, 2)}")
        self.printer.add(f"Run Reward: {round(self.run_reward, 2)}")
        self.printer.add("")

        if self.frame_buffer is not None:
            self.run_reward += reward

            self.observation_count += 1
            self.episode_observation_count += 1

            self.analytics_client.track(
                event_key="RUN_REWARD", data=dict(
                    reward=reward))

            if self.ppo_agent.agent.batch_count == self.ppo_agent.agent.batch_size - 1:
                self.printer.flush()
                self.printer.add("")
                self.printer.add("Updating Pac-AI Model With New Data... ")
                self.printer.flush()

                self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)
                self.ppo_agent.observe(
                    reward, terminal=(
                        self.game_data.IsOver()))
                self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)

                self.frame_buffer = None

                if not self.game_data.IsOver():
                    time.sleep(1)
                    return None
            else:
                self.ppo_agent.observe(
                    reward, terminal=(
                        self.game_data.IsOver()))

        self.printer.add(f"Observation Count: {self.observation_count}")
        self.printer.add(
            f"Episode Observation Count: {self.episode_observation_count}")
        self.printer.add(
            f"Current Batch Size: {self.ppo_agent.agent.batch_count}")
        self.printer.add("")

        if not self.game_data.IsOver():
            self.death_check = False

            self.printer.add(
                f"Continuity Bonus: {round(self.continuity_bonus, 2)}")
            self.printer.add("")
            self.printer.add(
                f"Average Rewards (Last 10 Runs): {round(self.average_reward_10, 2)}")
            self.printer.add(
                f"Average Rewards (Last 100 Runs): {round(self.average_reward_100, 2)}")
            self.printer.add(
                f"Average Rewards (Last 1000 Runs): {round(self.average_reward_1000, 2)}")
            self.printer.add("")
            self.printer.add(
                f"Top Run Reward: {round(self.top_reward, 2)} (Run #{self.top_reward_run})")
            self.printer.add("")
            self.printer.add(
                f"Previous Run Score: {round(self.previous_score, 2)}")
            self.printer.add("")
            self.printer.add(
                f"Average Score (Last 10 Runs): {round(self.average_score_10, 2)}")
            self.printer.add(
                f"Average Score (Last 100 Runs): {round(self.average_score_100, 2)}")
            self.printer.add(
                f"Average Score (Last 1000 Runs): {round(self.average_score_1000, 2)}")
            self.printer.add("")
            self.printer.add(
                f"Best Score: {round(self.best_score, 2)} (Run #{self.best_score_run})")
            self.printer.add("")
            self.printer.add("Latest Inputs:")
            self.printer.add("")

            for i in self.performed_inputs:
                self.printer.add(i)

            self.printer.flush()

            self.frame_buffer = FrameGrabber.get_frames(
                [0, 1, 2, 3], frame_type="PIPELINE")
            self.frame_buffer = self.extract_game_area(self.frame_buffer)

            action, label, game_input = self.ppo_agent.generate_action(
                self.frame_buffer)

            self.performed_inputs.appendleft(label)
            self.input_controller.handle_keys(game_input)
        else:
            self.input_controller.handle_keys([])
            self.analytics_client.track(
                event_key="RUN_END", data=dict(
                    run=self.run_count))

            self.printer.add("Game Over.")
            self.printer.flush()
            self.run_count += 1

            self.reward_10.appendleft(self.run_reward)
            self.reward_100.appendleft(self.run_reward)
            self.reward_1000.appendleft(self.run_reward)

            self.rewards.append(self.run_reward)

            self.average_reward_10 = float(np.mean(self.reward_10))
            self.average_reward_100 = float(np.mean(self.reward_100))
            self.average_reward_1000 = float(np.mean(self.reward_1000))

            if self.run_reward > self.top_reward:
                self.top_reward = self.run_reward
                self.top_reward_run = self.run_count - 1

                self.analytics_client.track(
                    event_key="NEW_RECORD",
                    data=dict(
                        type="REWARD",
                        value=self.run_reward,
                        run=self.run_count - 1))

            self.analytics_client.track(
                event_key="EPISODE_REWARD", data=dict(
                    reward=self.run_reward))

            self.previous_score = max(list(self.score)[:4])

            self.run_reward = 0

            self.score_10.appendleft(self.previous_score)
            self.score_100.appendleft(self.previous_score)
            self.score_1000.appendleft(self.previous_score)

            self.average_score_10 = float(np.mean(self.score_10))
            self.average_score_100 = float(np.mean(self.score_100))
            self.average_score_1000 = float(np.mean(self.score_1000))

            if self.previous_score > self.best_score:
                self.best_score = self.previous_score
                self.best_score_run = self.run_count - 1

                self.analytics_client.track(
                    event_key="NEW_RECORD",
                    data=dict(
                        type="score",
                        value=self.previous_score,
                        run=self.run_count - 1))

            if not self.run_count % 10:
                self.ppo_agent.agent.save_model(
                    directory=os.path.join(
                        os.getcwd(),
                        "datasets",
                        "pacai",
                        "ppo_model"),
                    append_timestep=False)
                self.dump_metadata()

            self.lives = collections.deque(np.full((16,), 3), maxlen=16)
            self.score = collections.deque(np.full((16,), 0), maxlen=16)

            self.multiplier_damage = 0

            self.performed_inputs.clear()

            self.frame_buffer = None

            self.input_controller.tap_key(KeyboardKey.KEY_ENTER, duration=1.5)

            self.episode_started_at = time.time()
            self.episode_observation_count = 0
예제 #24
0
    def handle_record(self, game_frame, game_frame_pipeline, **kwargs):
        game_frame_buffer = FrameGrabber.get_frames(self.frame_offsets,
                                                    frame_type="PIPELINE")

        self.game_frame_buffers.append(game_frame_buffer)
예제 #25
0
 def update_game_frame(self, frame_type="FULL"):
     game_frame_buffer = FrameGrabber.get_frames([0], frame_type=frame_type)
     return game_frame_buffer.frames[0]
예제 #26
0
    def handle_play(self, game_frame):
        self.isDescending = self.ascendDescend(game_frame)
        self.currentHP = self.computeActualHP(game_frame)
        self.falling(game_frame)
        

        for i, game_frame in enumerate(self.game_frame_buffer.frames):
            self.visual_debugger.store_image_data(
                game_frame.frame,
                game_frame.frame.shape,
                str(i)
            )
        
        if self.dqn_main_player.frame_stack is None:
            pipeline_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(100, 100),
                frame_type="PIPELINE",
                dtype="float64"
            ).frames[0]
            
            self.dqn_main_player.build_frame_stack(pipeline_game_frame.frame)
            self.dqn_buddy_player.frame_stack = self.dqn_main_player.frame_stack
            
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(100, 100),
                frame_type="PIPELINE",
                dtype="float64"
            )
        
        reward = self.calculate_reward()
        if self.dqn_main_player.mode == "TRAIN":
            
            self.game_state["run_reward_main"] += reward
            self.game_state["run_reward_buddy"] += reward
            
            self.dqn_main_player.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal= self.currentHP == 0 
            )
            
            self.dqn_buddy_player.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal= self.currentHP == 0 
            )            
                
            # Every 2000 steps, save latest weights to disk
            if self.dqn_main_player.current_step % 2000 == 0:
                    self.dqn_main_player.save_model_weights(
                        file_path_prefix= "datasets/dqn/dqn_main/"
                    )
                    self.dqn_buddy_player.save_model_weights(
                        file_path_prefix=f"datasets/dqn/dqn_buddy/"    
                    )

            # Every 20000 steps, save weights checkpoint to disk
            if self.dqn_main_player.current_step % 20000 == 0:
                    self.dqn_main_player.save_model_weights(
                        file_path_prefix= "datasets/dqn/dqn_main/",
                        is_checkpoint=True
                    )
                    self.dqn_buddy_player.save_model_weights(
                        file_path_prefix= "datasets/dqn/dqn_buddy/",
                        is_checkpoint=True
                    )                    

        elif self.dqn_main_player.mode == "RUN":
            self.dqn_main_player.update_frame_stack(game_frame_buffer)
            self.dqn_buddy_player.update_frame_stack(game_frame_buffer)

        run_time = datetime.now() - self.started_at

        serpent.utilities.clear_terminal()

        print(f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds")
        print("")

        print("MAIN NEURAL NETWORK:\n")
        self.dqn_main_player.output_step_data()

        print("")
        
        print("BUDDY NEURAL NETWORK:\n")
        self.dqn_buddy_player.output_step_data()

        print("")        
        print(f"CURRENT RUN: {self.game_state['current_run']}")
        print(f"CURRENT RUN REWARD: {round(self.game_state['run_reward_main'] + self.game_state['run_reward_buddy'] , 2)}")
        print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}")
        print(f"CURRENT HEALTH: {self.currentHP}")

        print("")
        print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds")

        print("")
        print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'} ")
        print("")

        print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds")


        
        if self.currentHP <= 0:
            serpent.utilities.clear_terminal()
            timestamp = datetime.utcnow()

            gc.enable()
            gc.collect()
            gc.disable()

            timestamp_delta = timestamp - self.game_state["run_timestamp"]
            self.game_state["last_run_duration"] = timestamp_delta.seconds

            if self.dqn_main_player.mode in ["TRAIN","RUN"]:
                #Check for Records
                if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0):
                    self.game_state["record_time_alive"] = {
                        "value": self.game_state["last_run_duration"],
                        "run": self.game_state["current_run"],
                        "predicted": self.dqn_main_player.mode == "RUN"
                }

            self.game_state["current_run_steps"] = 0

            self.input_controller.handle_keys([])

            if self.dqn_main_player.mode == "TRAIN":
                for i in range(16):
                    serpent.utilities.clear_terminal()
                    print(f"TRAINING ON MINI-BATCHES: {i + 1}/16")
                    print(f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}")

                    self.dqn_main_player.train_on_mini_batch()
                    self.dqn_buddy_player.train_on_mini_batch()

            self.game_state["run_timestamp"] = datetime.utcnow()
            self.game_state["current_run"] += 1
            self.game_state["run_reward_main"] = 0
            self.game_state["run_reward_buddy"] = 0
            self.game_state["run_predicted_actions"] = 0
            
            self.restartLevel()
            
            if self.dqn_main_player.mode in ["TRAIN", "RUN"]:
                if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0:
                    self.dqn_main_player.update_target_model()
                    self.dqn_buddy_player.update_target_model()
                    

                if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0:
                    self.dqn_main_player.enter_run_mode()
                    self.dqn_buddy_player.enter_run_mode()
                else:
                    self.dqn_main_player.enter_train_mode()
                    self.dqn_buddy_player.enter_train_mode()

            return None
        if(self.actualStep%2 == 0):
            self.dqn_main_player.pick_action()
            self.dqn_main_player.generate_action()
            movement_keys = self.dqn_main_player.get_input_values()
            print("")
            print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), movement_keys))))
            self.input_controller.handle_keys(movement_keys)
            self.dqn_main_player.erode_epsilon(factor=2)
            self.dqn_main_player.next_step()
            #time.sleep(1)	        
        else:
            self.dqn_buddy_player.pick_action()
            self.dqn_buddy_player.generate_action()
            movement_keys_buddy = self.dqn_buddy_player.get_input_values()         
            print("")
            print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name),movement_keys_buddy))))
            self.input_controller.handle_keys(movement_keys_buddy)
            self.dqn_buddy_player.erode_epsilon(factor=2)
            self.dqn_buddy_player.next_step()
            #time.sleep(1)
        #movement_keys = self.dqn_main_player.get_input_values()
        #movement_keys_buddy = self.dqn_buddy_player.get_input_values()        

        #print("")
        #print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), movement_keys + movement_keys_buddy))))
        #self.input_controller.handle_keys(movement_keys + movement_keys_buddy)

        if self.dqn_main_player.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.game_state["current_run_steps"] += 1
        self.actualStep += 1      
    def handle_play(self, game_frame):
        # Game crash detection
        game_frame_buffer = FrameGrabber.get_frames([0, 60])
        frame_1, frame_2 = game_frame_buffer.frames

        if np.array_equal(frame_1, frame_2):
            self.printer.add("")
            self.printer.add("Game appears to have crashed... Relaunching!")
            self.printer.flush()

            self.relaunch()
            self.just_relaunched = True

            self.frame_buffer = None

            return None

        # Check for recent game relaunch
        if self.just_relaunched:
            self.just_relaunched = False
            self.frame_buffer = None

            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)

            self.printer.flush()
            return None

        self.printer.add("")
        self.printer.add("Serpent.AI Lab - Superflight")
        self.printer.add(
            "Stage 2: Reinforcement Learning: Training a PPO Agent")
        self.printer.add("")
        self.printer.add(f"Stage Started At: {self.started_at}")
        self.printer.add(f"Current Run: #{self.run_count}")
        self.printer.add("")

        reward = self.reward_superflight_simple([None, None, game_frame, None])

        if self.frame_buffer is not None:
            if reward == 0 and self.death_check is False:
                pass
            else:
                self.ppo_agent.observe(reward, terminal=(reward == 0))
                self.observation_count += 1

        if reward > 0:
            self.death_check = False
            self.run_reward += reward

            self.frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6],
                                                        frame_type="PIPELINE")

            action, label, game_input = self.ppo_agent.generate_action(
                self.frame_buffer)

            self.performed_inputs.appendleft(label)
            self.input_controller.handle_keys(game_input)

            self.printer.add(f"Current Reward: {reward}")
            self.printer.add(f"Run Reward: {self.run_reward}")
            self.printer.add("")

            if self.observation_count < self.delay_fuzzing_observation_cap:
                self.printer.add(
                    f"Observation Count: {self.observation_count}")
                self.printer.add("")

            self.printer.add(
                f"Average Rewards (Last 10 Runs): {self.average_reward_10}")
            self.printer.add(
                f"Average Rewards (Last 100 Runs): {self.average_reward_100}")
            self.printer.add(
                f"Average Rewards (Last 1000 Runs): {self.average_reward_1000}"
            )
            self.printer.add("")
            self.printer.add(f"Previous Run Score: {self.previous_score}")
            self.printer.add("")
            self.printer.add(
                f"Average Score (Last 10 Runs): {self.average_score_10}")
            self.printer.add(
                f"Average Score (Last 100 Runs): {self.average_score_100}")
            self.printer.add(
                f"Average Score (Last 1000 Runs): {self.average_score_1000}")
            self.printer.add("")
            self.printer.add(
                f"Top Score: {self.top_score} (Run #{self.top_score_run})")
            self.printer.add("")
            self.printer.add(
                f"Random Agent Average Score: {self.random_average_score} (over {self.random_runs} runs)"
            )
            self.printer.add(
                f"Random Agent Top Score: {self.random_top_score}")
            self.printer.add("")
            self.printer.add("Latest Inputs:")
            self.printer.add("")

            for i in self.performed_inputs:
                self.printer.add(i)

            self.printer.flush()

            if self.observation_count < self.delay_fuzzing_observation_cap and np.random.uniform(
                    0, 1) > 0.5:
                time.sleep(random.choice(self.delay_fuzzing_durations))
        else:
            if not self.death_check:
                self.death_check = True

                self.printer.flush()
                return None
            else:
                self.input_controller.handle_keys([])

                self.run_count += 1
                self.performed_inputs.clear()

                self.reward_10.appendleft(self.run_reward)
                self.reward_100.appendleft(self.run_reward)
                self.reward_1000.appendleft(self.run_reward)

                self.run_reward = 0

                self.average_reward_10 = float(np.mean(self.reward_10))
                self.average_reward_100 = float(np.mean(self.reward_100))
                self.average_reward_1000 = float(np.mean(self.reward_1000))

                score = self.game.api.parse_score(game_frame)
                self.previous_score = score

                self.printer.add(
                    f"The game agent just died with score: {score}")

                self.score_10.appendleft(score)
                self.score_100.appendleft(score)
                self.score_1000.appendleft(score)

                self.average_score_10 = float(np.mean(self.score_10))
                self.average_score_100 = float(np.mean(self.score_100))
                self.average_score_1000 = float(np.mean(self.score_1000))

                if score > self.top_score:
                    self.printer.add(f"NEW RECORD!")

                    self.top_score = score
                    self.top_score_run = self.run_count - 1

                self.printer.add("")

                self.frame_buffer = None

                # Memory Leak Relaunch Check
                if (time.time() - self.game.launched_at) > 3600:
                    self.relaunch()
                    self.just_relaunched = True
                else:
                    for i in range(3):
                        self.input_controller.tap_key(KeyboardKey.KEY_UP)

                    if not self.run_count % 5:
                        self.printer.add("Changing Map...")
                        self.printer.flush()

                        self.game.api.change_map(
                            input_controller=self.input_controller)
                    else:
                        self.printer.add("Restarting...")
                        self.printer.flush()

                        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)

                time.sleep(0.5)
    def handle_random(self, game_frame):
        # Game Crash Detection
        game_frame_buffer = FrameGrabber.get_frames([0, 60])
        frame_1, frame_2 = game_frame_buffer.frames

        if np.array_equal(frame_1, frame_2):
            self.printer.add("")
            self.printer.add("Game appears to have crashed... Relaunching!")
            self.printer.flush()

            self.relaunch()
            self.just_relaunched = True

            return None

        self.printer.add("")
        self.printer.add("Serpent.AI Lab - Superflight")
        self.printer.add("Stage 1: Collecting Random Agent Data...")
        self.printer.add("")
        self.printer.add(f"Current Run: #{self.run_count}")
        self.printer.add("")

        if self.just_relaunched:
            self.just_relaunched = False
            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)

            self.printer.flush()
            return None

        reward = self.reward_superflight_simple([None, None, game_frame, None])

        if reward > 0:
            self.death_check = False

            game_input_key = random.choice(list(self.game_inputs.keys()))
            self.performed_inputs.appendleft(game_input_key)

            self.input_controller.handle_keys(self.game_inputs[game_input_key])

            self.printer.add(f"Average Score: {self.average_score}")
            self.printer.add(f"Top Score: {self.top_score}")
            self.printer.add("")
            self.printer.add(f"Previous Run Score: {self.previous_score}")
            self.printer.add("")
            self.printer.add("")
            self.printer.add(f"Reward: {reward}")
            self.printer.add("")
            self.printer.add("")
            self.printer.add("Latest Inputs:")
            self.printer.add("")

            for i in self.performed_inputs:
                self.printer.add(i)

            self.printer.flush()
        else:
            if not self.death_check:
                self.death_check = True

                self.printer.flush()
                return None
            else:
                self.input_controller.handle_keys([])

                self.run_count += 1
                self.performed_inputs.clear()

                score = self.game.api.parse_score(game_frame)

                self.previous_score = score
                self.average_score += ((score - self.average_score) /
                                       self.run_count)

                self.printer.add(
                    f"The game agent just died with score: {score}")

                if score > self.top_score:
                    self.printer.add(f"NEW RECORD!")
                    self.top_score = score

                self.printer.add("")

                # Memory Leak Relaunch Check
                if (time.time() - self.game.launched_at) > 3600:
                    self.relaunch()
                    self.just_relaunched = True
                else:
                    for i in range(3):
                        self.input_controller.tap_key(KeyboardKey.KEY_UP)

                    if not self.run_count % 5:
                        self.printer.add("Changing Map...")
                        self.printer.flush()

                        self.game.api.change_map(
                            input_controller=self.input_controller)
                    else:
                        self.printer.add("Restarting...")
                        self.printer.flush()

                        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)

                time.sleep(0.5)
예제 #29
0
    def handle_play_ddqn(self, game_frame):
        gc.disable()

        if self.dqn_movement.first_run:
            self.input_controller.tap_key(KeyboardKey.KEY_W)

            self.dqn_movement.first_run = False

            time.sleep(5)

            return None

        dragon_alive = self._measure_dragon_alive(game_frame)
        # dragon_coins = self._measure_dragon_coins(game_frame)

        self.game_state["alive"].appendleft(dragon_alive)
        # self.game_state["coins"].appendleft(dragon_coins)

        if self.dqn_movement.frame_stack is None:
            # pipeline_game_frame = FrameGrabber.get_frames(
            #     [0],
            #     frame_shape=game_frame.frame.shape,
            #     frame_type="MINI"
            # ).frames[0]

            self.dqn_movement.build_frame_stack(game_frame.ssim_frame)
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=game_frame.frame.shape,
                frame_type="MINI"
                )

            if self.dqn_movement.mode == "TRAIN":
                reward = self._calculate_reward()

                self.game_state["run_reward"] += reward

                self.dqn_movement.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal=self.game_state["alive"] == 0
                )
                # Every 2000 steps, save latest weights to disk
                if self.dqn_movement.current_step % 2000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/cloney_movement"
                    )

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_movement.current_step % 20000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/cloney_movement",
                        is_checkpoint=True
                    )

            elif self.dqn_movement.mode == "RUN":
                self.dqn_movement.update_frame_stack(self.game_frame_buffer)

            run_time = datetime.now() - self.started_at

            print("\033c" + f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds")
            print("")

            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_movement.output_step_data()

            print("")
            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(f"CURRENT RUN REWARD: {round(self.game_state['run_reward'], 2)}")
            print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}")
            print(f"CURRENT DRAGON ALIVE: {self.game_state['alive'][0]}")
            # print(f"CURRENT DRAGON COINS: {self.game_state['coins'][0]})

            print("")
            # print(f"AVERAGE ACTIONS PER SECOND: {round(self.game_state['average_aps'], 2)}")
            print("")
            print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds")
            # print(f"LAST RUN COINS: {self.game_state['last_run_coins'][0]})

            print("")
            print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})")
            # print(f"RECORD COINS COLLECTED: {self.game_state['record_coins_collected'].get('value')} coins (Run {self.game_state['record_coins_collected'].get('run')}, {'Predicted' if self.game_state['record_coins_collected'].get('predicted') else 'Training'})")
            print("")
            print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds")

            if self.game_state["alive"][1] <= 0:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                # Set display stuff TODO
                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_movement.mode == "RUN"
                        }

                    # if self.game_state["coins"][0] < self.game_state["record_coins_collected"].get("value", 1000):
                    #     self.game_state["record_coins_collected"] = {
                    #         "value": self.game_state["coins"][0],
                    #         "run": self.game_state["current_run"],
                    #         "predicted": self.dqn_movement.mode == "RUN"
                    #     }
                else:
                    self.game_state["random_time_alives"].append(self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(self.game_state["random_time_alives"])

                self.game_state["current_run_steps"] = 0

                self.input_controller.release_key(KeyboardKey.KEY_SPACE)

                if self.dqn_movement.mode == "TRAIN":
                    for i in range(8):
                        serpent.utilities.clear_terminal()
                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/8")
                        print(f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}")

                        self.dqn_movement.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_movement"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["alive"] = collections.deque(np.full((8,), 4), maxlen=8)
                # self.game_state["coins"] = collections.deque(np.full((8,), 0), maxlen=8)

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0:
                        if self.dqn_movement.type == "DDQN":
                            self.dqn_movement.update_target_model()

                    if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0:
                        self.dqn_movement.enter_run_mode()
                    else:
                        self.dqn_movement.enter_train_mode()

                self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
                time.sleep(5)

                return None

        self.dqn_movement.pick_action()
        self.dqn_movement.generate_action()

        keys = self.dqn_movement.get_input_values()
        print("")
        print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), keys))))

        self.input_controller.handle_keys(keys)

        if self.dqn_movement.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_movement.erode_epsilon(factor=2)

        self.dqn_movement.next_step()

        self.game_state["current_run_steps"] += 1
예제 #30
0
    def handle_play(self, game_frame):

        gc.disable()

        for i, game_frame in enumerate(self.game_frame_buffer.frames):
            self.visual_debugger.store_image_data(game_frame.frame,
                                                  game_frame.frame.shape,
                                                  str(i))

        if self.dqn_direction.first_run:
            # self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
            # time.sleep(5)

            self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

            self.dqn_direction.first_run = False

            return None

        actor_hp = self._measure_actor_hp(game_frame)
        run_score = self._measure_run_score(game_frame)

        self.game_state["health"].appendleft(actor_hp)
        self.game_state["score"].appendleft(run_score)

        if self.dqn_direction.frame_stack is None:
            full_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE").frames[0]

            self.dqn_direction.build_frame_stack(full_game_frame.frame)
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE")

            if self.dqn_direction.mode == "TRAIN":
                reward_direction, reward_action = self._calculate_reward()

                self.game_state["run_reward_direction"] += reward_direction
                self.game_state["run_reward_action"] += reward_action

                self.dqn_direction.append_to_replay_memory(
                    game_frame_buffer,
                    reward_direction,
                    terminal=self.game_state["health"] == 0)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_direction.current_step % 2000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/Fortnite_direction")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_direction.current_step % 20000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/Fortnite_direction",
                        is_checkpoint=True)

            elif self.dqn_direction.mode == "RUN":
                self.dqn_direction.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at

            serpent.utilities.clear_terminal()

            print(
                f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes,, {run_time.seconds % 60} seconds"
            )
            print(
                "GAME: Fortnite   PLATFORM: EXE   AGENT: DDQN + Prioritized Experience Replay"
            )
            print("")

            self.dqn_direction.output_step_data()

            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(
                f"CURRENT RUN REWARD: {round(self.game_state['run_reward_direction'] + self.game_state['run_reward_action'], 2)}"
            )
            print(
                f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(f"CURRENT HEALTH: {self.game_state['health'][0]}")
            print(f"CURRENT SCORE: {self.game_state['score'][0]}")
            print("")
            print(
                f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds"
            )

            print("")
            print(
                f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})"
            )
            print("")

            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )
            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )

            if self.game_state["health"][1] <= 0:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state[
                            "record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_direction.mode == "RUN"
                        }
                else:
                    self.game_state["random_time_alives"].append(
                        self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(
                        self.game_state["random_time_alives"])

                self.game_state["current_run_steps"] = 0
                self.input_controller.handle_keys([])

                if self.dqn_direction.mode == "TRAIN":
                    for i in range(8):
                        run_time = datetime.now() - self.started_at
                        serpent.utilities.clear_terminal()
                        print(
                            f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
                        )
                        print(
                            "GAME: Fortnite                 PLATFORM: EXE                AGENT: DDQN + Prioritized Experience Replay"
                        )
                        print("")

                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/2")
                        print(
                            f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}"
                        )

                        self.dqn_direction.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_direction"] = 0
                self.game_state["run_reward_action"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["health"] = collections.deque(np.full((8, ),
                                                                      3),
                                                              maxlen=8)
                self.game_state["score"] = collections.deque(np.full((8, ), 0),
                                                             maxlen=8)

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 100 == 0:
                        if self.dqn_direction.type == "DDQN":
                            self.dqn_direction.update_target_model()
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 20 == 0:
                        self.dqn_direction.enter_run_mode()
                    else:
                        self.dqn_direction.enter_train_mode()

                # self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
                # time.sleep(3)

                self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

                return None

        self.dqn_direction.pick_action()
        self.dqn_direction.generate_action()

        keys = self.dqn_direction.get_input_values()

        print("")
        print(keys)
        img = pyautogui.screenshot(region=(0, 0, 1920, 1080))
        # convert image to numpy array
        im = np.array(img)
        custom = self.detector.CustomObjects(person=True)
        detections = self.detector.detectCustomObjectsFromImage(
            custom_objects=custom, input_type="array", input_image=im)
        for eachObject in detections:
            print(eachObject["box_points"])
            tuple_of_x_and_y = eachObject["box_points"]
            centerX = (tuple_of_x_and_y[0] + tuple_of_x_and_y[2]) / 2
            centerY = (tuple_of_x_and_y[1] + tuple_of_x_and_y[3]) / 2
            centerX = int(centerX)
            centerY = int(centerY)
            ctypes.windll.user32.SetCursorPos(centerX, centerY)
            ctypes.windll.user32.mouse_event(2, 0, 0, 0, 0)  # left down
            time.sleep(0.05)
            ctypes.windll.user32.mouse_event(4, 0, 0, 0, 0)  # left up
            self.shot_reward = 100000

        self.input_controller.handle_keys(keys)
        if self.dqn_direction.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_direction.erode_epsilon(factor=2)

        self.dqn_direction.next_step()

        self.game_state["current_run_steps"] += 1