class SerpenthornetGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.analytics_client = None self.game_state = None self.s_p1 = 16 self.game_over = False self.reward = 0 self._reset_game_state() def setup_play(self): input_mapping = { "UP": [KeyboardKey.KEY_W], "LEFT": [KeyboardKey.KEY_A], "DOWN": [KeyboardKey.KEY_S], "RIGHT": [KeyboardKey.KEY_D], "LEFT_UP": [KeyboardKey.KEY_W, KeyboardKey.KEY_A], "RIGHT_UP": [KeyboardKey.KEY_W, KeyboardKey.KEY_D], "LEFT_DOWN": [KeyboardKey.KEY_S, KeyboardKey.KEY_A], "RIGHT_DOWN": [KeyboardKey.KEY_S, KeyboardKey.KEY_D], "UP_SHOOT": [KeyboardKey.KEY_UP], "LEFT_SHOOT": [KeyboardKey.KEY_LEFT], "DOWN_SHOOT": [KeyboardKey.KEY_DOWN], "RIGHT_SHOOT": [KeyboardKey.KEY_RIGHT], "LEFT_UP_SHOOT": [KeyboardKey.KEY_LEFT, KeyboardKey.KEY_UP], "RIGHT_UP_SHOOT": [KeyboardKey.KEY_RIGHT, KeyboardKey.KEY_UP], "LEFT_DOWN_SHOOT": [KeyboardKey.KEY_LEFT, KeyboardKey.KEY_DOWN], "RIGHT_DOWN_SHOOT": [KeyboardKey.KEY_RIGHT, KeyboardKey.KEY_DOWN], "BOOM": [KeyboardKey.KEY_SPACE] } self.key_mapping = { KeyboardKey.KEY_W.name: "MOVE UP", KeyboardKey.KEY_A.name: "MOVE LEFT", KeyboardKey.KEY_S.name: "MOVE DOWN", KeyboardKey.KEY_D.name: "MOVE RIGHT", KeyboardKey.KEY_UP.name: "SHOOT UP", KeyboardKey.KEY_LEFT.name: "SHOOT LEFT", KeyboardKey.KEY_DOWN.name: "SHOOT DOWN", KeyboardKey.KEY_RIGHT.name: "SHOOT RIGHT", } movement_action_space = KeyboardMouseActionSpace( directional_keys=[None, "UP", "LEFT", "DOWN", "RIGHT", "LEFT_UP", "RIGHT_UP", "LEFT_DOWN", "RIGHT_DOWN"] ) projectile_action_space = KeyboardMouseActionSpace( projectile_keys=[None, "UP_SHOOT", "LEFT_SHOOT", "DOWN_SHOOT", "RIGHT_SHOOT", "LEFT_UP_SHOOT", "RIGHT_UP_SHOOT", "LEFT_DOWN_SHOOT", "RIGHT_DOWN_SHOOT", "BOOM"] ) movement_model_file_path = "datasets/hornet_movement_dqn.h5".replace("/", os.sep) self.dqn_movement = DDQN( model_file_path=movement_model_file_path if os.path.isfile(movement_model_file_path) else None, input_shape=(100, 100, 3), input_mapping=input_mapping, action_space=movement_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=10, batch_size=16, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False ) projectile_model_file_path = "datasets/hornet_projectile_dqn.h5".replace("/", os.sep) self.dqn_projectile = DDQN( model_file_path=projectile_model_file_path if os.path.isfile(projectile_model_file_path) else None, input_shape=(100, 100, 3), input_mapping=input_mapping, action_space=projectile_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=10, batch_size=16, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False ) # try: # self.dqn_projectile.load_model_weights(file_path='model/hornet/binding_of_isaac_projectile_dqn_150000_0.5687282200080599_.h5',override_epsilon=True) # self.dqn_movement.load_model_weights(file_path='model/hornet/binding_of_isacc_movement_dqn_150000_0.5687282200080599_.h5', override_epsilon=True) # except Exception as e: # raise e def get_reward_state(self, heart, score): try: score = int(score) if score > self.game_state['game_score']: score_reward = 0.5 elif self.game_state['game_score'] - score > 100: score_reward = 0.5 else: score_reward = 0 self.game_state['game_score'] = score except Exception as e: score_reward = 0 if heart == -1: # print(heart, self.game_over, "restart game waiting") self.game_over = True self.reward = 0 self.s_p1 = 16 elif self.game_over == False and heart == 0: self.game_over = True self.reward = ((16 - (self.s_p1 - heart)*2) / 16) + score_reward self.s_p1 = 16 # print(heart, self.game_over, self.reward) self.reward = 0 elif self.game_over == True and heart == 0: pass # print(heart, self.game_over, "game over, restart waiting") elif heart != 0 and heart != -1: self.game_over = False self.reward = ((16 - (self.s_p1 - heart)*16) / 16) + score_reward self.s_p1 = heart # print(heart, self.game_over, self.reward) def handle_play(self, game_frame): gc.disable() heart1 = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions['HUD_HEART_15']) heart_p = heart1[0, 0, 0] print(heart_p) if heart_p == 0: gc.enable() gc.collect() gc.disable() for i, game_frame in enumerate(self.game_frame_buffer.frames): self.visual_debugger.store_image_data( game_frame.frame, game_frame.frame.shape, str(i) ) self.input_controller.tap_key(KeyboardKey.KEY_D) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) else: self.train_ddqn(game_frame) def train_ddqn(self, game_frame): if self.dqn_movement.first_run: self.dqn_movement.first_run = False self.dqn_projectile.first_run = False return None heart = frame_to_hearts(game_frame.frame, self.game) score = self._process_ocr(game_frame) self.get_reward_state(heart, score) if self.dqn_movement.frame_stack is None: pipline_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64" ).frames[0] print(np.shape(pipline_game_frame.frame)) # self.dqn_movement.build_frame_stack(pipline_game_frame.frame) self.dqn_movement.frame_stack = self._build_frame_stack(pipline_game_frame.frame) self.dqn_projectile.frame_stack = self.dqn_movement.frame_stack else: game_frame_buffer = FrameGrabber.get_frames( # [0, 4, 8, 12], [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64" ) if self.dqn_movement.mode == "TRAIN": self.game_state["run_reward_movement"] += self.reward self.game_state["run_reward_projectile"] += self.reward self._movement_append_to_replay_memory( game_frame_buffer, self.reward, terminal=self.game_over ) self._projectile_append_to_replay_memory( game_frame_buffer, self.reward, terminal=self.game_over ) #Every 2000 steps, save latest weights to disk if self.dqn_movement.current_step % 2000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/binding_of_isacc_movement" ) self.dqn_projectile.save_model_weights( file_path_prefix=f"datasets/binding_of_isaac_projectile" ) #Every 20000 steps, save weights checkpoint to disk if self.dqn_movement.current_step % 20000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/c_binding_of_isaac_movement", is_checkpoint=True ) self.dqn_projectile.save_model_weights( file_path_prefix=f"datasets/c_binding_of_isaac_projectile", is_checkpoint=True ) elif self.dqn_movement.mode == "RUN": game_frames = [game_frame.frame for game_frame in game_frame_buffer.frames] self.dqn_movement.frame_stack = np.array(game_frames) self.dqn_projectile.frame_stack = np.array(game_frames) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print(f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours," f" {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds") print("MOVEMENT NEURAL NETWORK:\n") self.dqn_movement.output_step_data() print(f"reward:{self.reward}") print("PROJECTILE NEURAL NETWORK:\n") self.dqn_projectile.output_step_data() print(f"CURRENT RUN: {self.game_state['current_run']}") print(f"CURRENT RUN REWARD: " f"{round(self.reward + self.reward, 2)}") print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}") print(f"CURRENT HEALTH: {heart}") print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds") print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds " f"(Run {self.game_state['record_time_alive'].get('run')}, " f"{'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'}") print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds") if self.game_over == True: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_movement.mode in ["TRAIN", "RUN"]: #Check for Records if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN" } else: self.game_state["random_time_alives"].append(self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean(self.game_state["random_time_alives"]) self.game_state["current_run_state"] = 0 self.input_controller.handle_keys([]) if self.dqn_movement.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print(f"TRAINING ON MINI-BATCHES: {i + 1}/16") print(f"NEXT RUN: {self.game_state['current_run'] + 1} " f"{'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}") self.dqn_movement.train_on_mini_batch() self.dqn_projectile.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_movement"] = 0 self.game_state["run_reward_projectile"] = 0 self.game_state["run_predicted_actions"] = 0 self.s_p1 = 16 self.game_over = False self.reward = 0 if self.dqn_movement.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0: self.dqn_movement.update_target_model() self.dqn_projectile.update_target_model() if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0: self.dqn_movement.enter_run_mode() self.dqn_projectile.enter_run_mode() else: self.dqn_movement.enter_train_mode() self.dqn_projectile.enter_train_mode() return None self.dqn_movement.pick_action() self.dqn_movement.generate_action() self.dqn_projectile.pick_action(action_type=self.dqn_movement.current_action_type) self.dqn_projectile.generate_action() try: _thread.start_new_thread(self._execute_action, ("Thread", )) except Exception as e: print(e) if self.dqn_movement.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_movement.erode_epsilon(factor=2) self.dqn_projectile.erode_epsilon(factor=2) self.dqn_movement.next_step() self.dqn_projectile.next_step() self.game_state["current_run_steps"] += 1 def _execute_action(self, threadname): movement_keys = self.dqn_movement.get_input_values() projectile_keys = self.dqn_projectile.get_input_values() self.input_controller.handle_keys(movement_keys + projectile_keys) def _reset_game_state(self): self.game_state = { "current_run": 19, "current_run_steps": 0, "run_reward_movement": 0, "run_reward_projectile": 0, "run_future_rewards": 0, "run_predicted_actions": 0, "run_timestamp": datetime.utcnow(), "last_run_duration": 0, "record_time_alive": dict(), "random_time_alive": None, "random_time_alives": list(), "game_score": 0 } def _process_ocr(self, game_frame): score_image = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["SCORE"]) score_image = cv2.cvtColor(score_image, cv2.COLOR_BGR2GRAY) score_image[score_image < 255] = 0 score_image = Image.fromarray(np.uint8(score_image)) # score_image.show() # print(np.shape(score)) # print(pytesseract.image_to_string(score_image, lang='chi_sim+eng')) return pytesseract.image_to_string(score_image, lang='chi_sim', boxes=False, config='--psm 10 --eom 3 -c tessedit_char_whitelist=0123456789') def _build_frame_stack(self, game_frame): frame_stack = np.stack(( game_frame ), axis=0) return frame_stack.reshape((1,) + frame_stack.shape) def _movement_append_to_replay_memory(self, game_frame_buffer, reward, terminal=False): game_frames = [game_frame.frame for game_frame in game_frame_buffer.frames] previous_frame_stack = self.dqn_movement.frame_stack self.dqn_movement.frame_stack = np.array(game_frames) observation = [ previous_frame_stack, self.dqn_movement.current_action_index, reward, self.dqn_movement.frame_stack, terminal ] self.dqn_movement.replay_memory.add(self.dqn_movement.calculate_target_error(observation), observation) def _projectile_append_to_replay_memory(self, game_frame_buffer, reward, terminal=False): game_frames = [game_frame.frame for game_frame in game_frame_buffer.frames] previous_frame_stack = self.dqn_projectile.frame_stack self.dqn_projectile.frame_stack = np.array(game_frames) observation = [ previous_frame_stack, self.dqn_projectile.current_action_index, reward, self.dqn_projectile.frame_stack, terminal ] self.dqn_projectile.replay_memory.add(self.dqn_projectile.calculate_target_error(observation), observation)
class SerpentPikaBallGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.previous_game_frame = None self.lowerY = np.array([255, 255, 0], np.uint8) self.upperY = np.array([255, 255, 10], np.uint8) self.lowerR = np.array([255, 0, 0], np.uint8) self.upperR = np.array([255, 0, 10], np.uint8) self.game_state = None self._reset_game_state() def setup_key(self): self.input_mapping = { "JUMP": [KeyboardKey.KEY_UP], "RIGHT": [KeyboardKey.KEY_RIGHT], "LEFT": [KeyboardKey.KEY_LEFT], "UP_HIT": [KeyboardKey.KEY_UP, KeyboardKey.KEY_RETURN], "L_HIT": [KeyboardKey.KEY_LEFT, KeyboardKey.KEY_RETURN], "DOWN_HIT": [KeyboardKey.KEY_DOWN, KeyboardKey.KEY_RETURN], "NONE": [] } self.key_mapping = { KeyboardKey.KEY_UP: "UP", KeyboardKey.KEY_RIGHT: "RIGHT", KeyboardKey.KEY_DOWN: "DOWN", KeyboardKey.KEY_LEFT: "LEFT", KeyboardKey.KEY_RETURN: "HIT" } self.action_space = KeyboardMouseActionSpace(action=[ 'JUMP', 'RIGHT', 'LEFT', 'UP_HIT', 'L_HIT', 'DOWN_HIT', 'NONE' ]) move_inputs = { "JUMP": [KeyboardKey.KEY_UP], "RIGHT": [KeyboardKey.KEY_RIGHT], "LEFT": [KeyboardKey.KEY_LEFT], "NO_MOVE": [] } attack_inputs = {"Power Hit": [KeyboardKey.KEY_RETURN], "NO_HIT": []} self.game_inputs = dict() for move_label, attack_label in itertools.product( move_inputs, attack_inputs): label = f"{move_label.ljust(10)}{attack_label}" self.game_inputs[ label] = move_inputs[move_label] + attack_inputs[attack_label] print(self.game_inputs) def setup_play(self): #self.cid = 0 self.trainID = 0 self.setup_key() self.frame_process = False self.rewards = list() self.started_at = datetime.now() self.started_at_str = self.started_at.isoformat() latest_epsilon = 1 model_file_path = 'fighting_movement_dqn_0_1_.h5' model_list = os.listdir('model') for item in model_list: for epsilon in re.findall("\d+\.\d+", item): if latest_epsilon > float(epsilon): latest_epsilon = float(epsilon) model_file_path = item model_file_path = f'model/{model_file_path}'.replace('/', os.sep) print(">> LOAD MODEL: ", model_file_path) time.sleep(1) self.dqn_action = DDQN( model_file_path=model_file_path if os.path.isfile(model_file_path) else None, input_shape=(114, 162, 4), input_mapping=self.input_mapping, action_space=self.action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=100 if os.path.isfile(model_file_path) else 1000, batch_size=32, initial_epsilon=1, final_epsilon=0.01, override_epsilon=True) print('Starting Game') self.input_controller.tap_key(KeyboardKey.KEY_RETURN) def getDifference(self, game_frame, previous_game_frame): return game_frame.grayscale_frame - previous_game_frame.grayscale_frame # old img process way def handle_frame_process(self, game_frame): ''' if not self.frame_process: return if self.game_frame_buffer.previous_game_frame is not None: try: threshold = skimage.filters.threshold_otsu(game_frame.grayscale_frame) except ValueError: threshold = -1 gray_frame = game_frame.grayscale_frame > threshold gray_frame = skimage.filters.gaussian(gray_frame) gray_cv_frame = cv2.cvtColor(np.array(gray_frame * 255, dtype='uint8'), cv2.COLOR_GRAY2RGB) cv_frame = cv2.cvtColor(np.asarray(game_frame.frame, dtype='uint8'), cv2.COLOR_BGR2RGB) cv_frame = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB) frame_threshed = cv2.inRange(cv_frame, self.lowerY, self.upperY) # find connected components (pikachu) _, cnts, hierarchy, = cv2.findContours(frame_threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) if len(cnts) > 0: cnts = sorted(cnts, key = cv2.contourArea, reverse = True) for i in range(0, 2): # Draw a rectangular frame around the detected object x, y, w, h = cv2.boundingRect(cnts[i]) cv2.rectangle(gray_cv_frame, (x,y), (x+w,y+h), (0,255,0), 2) frame_threshed = cv2.inRange(cv_frame, self.lowerR, self.upperR) # find connected components (ball) _, cnts, hierarchy, = cv2.findContours(frame_threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) if len(cnts) > 0: cnt = sorted(cnts, key = cv2.contourArea, reverse = True)[0] if len(cnt) > 70: (x,y), radius = cv2.minEnclosingCircle(cnt) cv2.circle(gray_cv_frame, (int(x),int(y)), int(radius), (255,0,0),2) self.visual_debugger.store_image_data( gray_cv_frame, gray_cv_frame.shape, "grayscale" ) # send difference to debugger (optional) frame_difference = self.getDifference(game_frame, self.game_frame_buffer.previous_game_frame) bw_frame_difference = frame_difference > 100 bw_frame_difference = skimage.filters.sobel(bw_frame_difference) self.visual_debugger.store_image_data( np.array(bw_frame_difference * 255, dtype='uint8'), bw_frame_difference.shape, "frame_diff" ) ''' def handle_play(self, game_frame): # locate sprite position and existence ''' logo_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_LOGO'], game_frame=game_frame) menu_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_MENU'], game_frame=game_frame) game_set_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_GAME_SET'], game_frame=game_frame) ''' # append memory data into game state (self.game_state["com_x"], self.game_state["com_y"], self.ai_x, self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc, self.col_size, self.game_state["col_x"], self.game_state["col_y"]) = readInfo() self.game_state["ai_x"].appendleft(self.ai_x) self.game_state["ai_y"].appendleft(self.ai_y) self.game_state["ball_x"].appendleft(self.ball_x) self.game_state["ball_y"].appendleft(self.ball_y) self.game_state["ai_score"].appendleft(self.ai_sc) self.game_state["com_score"].appendleft(self.com_sc) self.game_state["col_size"].appendleft(self.col_size) self.handle_frame_process(game_frame) ''' if(logo_locator): print('Entering Logo...') self.game_state["playing"] = False self.handle_menu() elif (menu_locator): print('Entering Menu...') self.game_state["playing"] = False self.handle_menu() elif (game_set_locator): print('Game Set!') self.handle_fight_end(game_frame) ''' # judge is-in-game by read pixel value (tricky) self.game_frame_img = FrameGrabber.get_frames( [0], frame_type="PIPELINE").frames[0].frame if self.game_frame_img[100, 81] != 0.7137254901960784: self.handle_notInGame() else: self.game_state["playing"] = True self.handle_fight(game_frame) def handle_notInGame(self): serpent.utilities.clear_terminal() print('Currently not in game...please wait..') playAnimation(self.game_state["animeIndex"]) self.game_state["animeIndex"] = self.game_state[ "animeIndex"] + 1 if self.game_state["animeIndex"] < 3 else 0 #print(self.game_frame_img[95:105,80:83]) self.input_controller.tap_key(KeyboardKey.KEY_RETURN) time.sleep(0.3) def handle_menu(self): self.input_controller.tap_key(KeyboardKey.KEY_RETURN) time.sleep(4) def handle_fight(self, game_frame): gc.disable() if self.dqn_action.first_run: self.dqn_action.first_run = False return if self.dqn_action.frame_stack is None: game_frame_buffer = FrameGrabber.get_frames( [0], frame_type="PIPELINE").frames[0] self.dqn_action.build_frame_stack(game_frame_buffer.frame) else: # saving frame pic to analyze #self.cid = self.cid + 1 #game_frame_img = FrameGrabber.get_frames([0], frame_type="PIPELINE").frames[0] #skimage.io.imsave(f"frame{self.cid}.png", game_frame_img.frame) game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") if self.dqn_action.mode == "TRAIN": reward = self._calculate_reward() self.game_state["reward"] += reward self.dqn_action.append_to_replay_memory( game_frame_buffer, reward, terminal=self.game_state["ai_score"][0] == 15) # Every 2000 steps, save latest weights to disk if self.dqn_action.current_step % 1000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"model/fighting_movement") # Every 20000 steps, save weights checkpoint to disk if self.dqn_action.current_step % 10000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"model/fighting_movement", is_checkpoint=True) elif self.dqn_action.mode == "RUN": self.dqn_action.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print('') print(Fore.YELLOW) print(Style.BRIGHT) print(f"STARTED AT:{self.started_at_str}") print( f"RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} s" ) print(Style.RESET_ALL) #print("") print(Fore.GREEN) print(Style.BRIGHT) print("MOVEMENT NEURAL NETWORK:\n") self.dqn_action.output_step_data() print(Style.RESET_ALL) print(Style.BRIGHT) print(f"CURRENT RUN: {self.game_state['current_run'] }") print("") print( f"CURRENT RUN REWARD: {round(self.game_state['reward'], 4)}") print(f"CURRENT AI SCORE: {self.game_state['ai_score'][0]}") print(f"CURRENT ENEMY SCORE: {self.game_state['com_score'][0]}") print("") print( f"PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(Style.RESET_ALL) self.dqn_action.pick_action() self.dqn_action.generate_action() movement_keys = self.dqn_action.get_input_values() print("") print(Fore.GREEN) print(Style.BRIGHT) #print(movement_keys) print("" + " + ".join( list(map(lambda k: self.key_mapping.get(k), movement_keys)))) print(Style.RESET_ALL) print("") print( f"AI: ({self.game_state['ai_x'][0]}, {self.game_state['ai_y'][0]})" ) print( f"COM: ({self.game_state['com_x']}, {self.game_state['com_y']})" ) print( f"BALL: ({self.game_state['ball_x'][0]}, {self.game_state['ball_y'][0]})" ) print( f"Collision: ({self.game_state['col_x']}, {self.game_state['col_y']}, {self.game_state['col_size'][0]})" ) print(f"Distance: {self.game_state['distance'][0]}") self.input_controller.handle_keys(movement_keys) if self.dqn_action.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_action.erode_epsilon(factor=2) self.dqn_action.next_step() self.game_state["current_run"] += 1 if self.game_state['ai_score'][0] == 15 or self.game_state[ 'com_score'][0] == 15: # Game over self.game_state["ai_score"].appendleft(0) self.game_state["com_score"].appendleft(0) self.handle_fight_end(game_frame) def handle_fight_end(self, game_frame): self.game_state["playing"] = False self.input_controller.handle_keys([]) self.game_state["current_run"] += 1 self.handle_fight_training(game_frame) def handle_fight_training(self, game_frame): #self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) serpent.utilities.clear_terminal() gc.enable() gc.collect() gc.disable() print("TRAIN MODE") self.input_controller.handle_keys([]) if self.dqn_action.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print("") print(Fore.GREEN) print(Style.BRIGHT) print(f"TRAINING ON MINI-BATCHES: {i + 1}/16") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 25 == 0 else ''}" ) print(Style.RESET_ALL) self.dqn_action.train_on_mini_batch() self.game_state["run_predicted_actions"] = 0 if self.dqn_action.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: self.dqn_action.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_action.enter_run_mode() else: self.dqn_action.enter_train_mode() self.input_controller.tap_key(KeyboardKey.KEY_RETURN) time.sleep(2) def _reset_game_state(self): self.game_state = { "reward": 0, "animeIndex": 0, "current_run": 1, "playing": False, "run_predicted_actions": 0, "ai_x": collections.deque(np.full((4, ), 0), maxlen=4), "ai_y": collections.deque(np.full((4, ), 0), maxlen=4), "ai_score": collections.deque(np.full((4, ), 0), maxlen=4), "ball_x": collections.deque(np.full((4, ), 0), maxlen=4), "ball_y": collections.deque(np.full((4, ), 0), maxlen=4), "com_score": collections.deque(np.full((4, ), 0), maxlen=4), "col_size": collections.deque(np.full((4, ), 6), maxlen=4), "com_x": 36, "com_y": 244, "col_x": 0, "col_y": 0, "distance": collections.deque(np.full((20, ), 100), maxlen=20), } def _calculate_reward(self): reward = 0 distance = math.sqrt( abs(self.game_state["ai_x"][0] - self.game_state["ball_x"][0])**2 + abs(self.game_state["ai_y"][0] - self.game_state["ball_y"][0])**2) self.game_state["distance"].appendleft(int(distance)) # collision with ball collision = self.game_state["distance"][0] < 80 and self.game_state[ "distance"][1] < 80 and self.game_state["distance"][ 2] < 80 and self.game_state["distance"][0] > self.game_state[ "distance"][1] and self.game_state["distance"][ 1] < self.game_state["distance"][2] if collision: reward += 0.25 # power hit if self.game_state["col_size"][0] > 0 and self.game_state["distance"][ 0] < 90 and self.game_state["col_y"] != 272: reward += 0.5 # AI gain score if self.game_state["ai_score"][0] > self.game_state["ai_score"][1]: reward += 1 # Com gain score if self.game_state["com_score"][0] > self.game_state["com_score"][1]: reward += -1 if reward > 1: reward = 1 self.game_state["reward"] = reward return reward
class SerpentFortniteGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.game_state = None self._reset_game_state() def setup_play(self): self.detector = ObjectDetection() self.detector.setModelTypeAsTinyYOLOv3() self.detector.setModelPath("yolo.h5") self.detector.loadModel(detection_speed="flash") input_mapping = { "KEY_W": [KeyboardKey.KEY_W], "KEY_A": [KeyboardKey.KEY_A], "KEY_S": [KeyboardKey.KEY_S], "KEY_D": [KeyboardKey.KEY_D], "KEY_SPACE": [KeyboardKey.KEY_SPACE], "KEY_C": [KeyboardKey.KEY_C], "KEY_1": [KeyboardKey.KEY_1], "KEY_2": [KeyboardKey.KEY_2] } self.key_mapping = { KeyboardKey.KEY_W.name: "KEY_W", KeyboardKey.KEY_A.name: "KEY_A", KeyboardKey.KEY_S.name: "KEY_S", KeyboardKey.KEY_D.name: "KEY_D", KeyboardKey.KEY_SPACE.name: "KEY_SPACE", KeyboardKey.KEY_C.name: "KEY_C", KeyboardKey.KEY_1.name: "KEY_1", KeyboardKey.KEY_2.name: "KEY_2" } direction_action_space = KeyboardMouseActionSpace(direction_keys=[ "KEY_W", "KEY_A", "KEY_S", "KEY_D", "KEY_SPACE", "KEY_C", "KEY_1", "KEY_2" ]) direction_model_file_path = "datasets/Fortnite_direction_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_direction = DDQN( model_file_path=direction_model_file_path if os.path.isfile(direction_model_file_path) else None, input_shape=(480, 640, 4), input_mapping=input_mapping, action_space=direction_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=600, batch_size=32, model_learning_rate=1e-4, initial_epsilon=1, final_epsilon=0.01, ) def handle_play(self, game_frame): gc.disable() for i, game_frame in enumerate(self.game_frame_buffer.frames): self.visual_debugger.store_image_data(game_frame.frame, game_frame.frame.shape, str(i)) if self.dqn_direction.first_run: # self.input_controller.tap_key(KeyboardKey.KEY_SPACE) # time.sleep(5) self.input_controller.tap_key(KeyboardKey.KEY_SPACE) self.dqn_direction.first_run = False return None actor_hp = self._measure_actor_hp(game_frame) run_score = self._measure_run_score(game_frame) self.game_state["health"].appendleft(actor_hp) self.game_state["score"].appendleft(run_score) if self.dqn_direction.frame_stack is None: full_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE").frames[0] self.dqn_direction.build_frame_stack(full_game_frame.frame) else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE") if self.dqn_direction.mode == "TRAIN": reward_direction, reward_action = self._calculate_reward() self.game_state["run_reward_direction"] += reward_direction self.game_state["run_reward_action"] += reward_action self.dqn_direction.append_to_replay_memory( game_frame_buffer, reward_direction, terminal=self.game_state["health"] == 0) # Every 2000 steps, save latest weights to disk if self.dqn_direction.current_step % 2000 == 0: self.dqn_direction.save_model_weights( file_path_prefix=f"datasets/Fortnite_direction") # Every 20000 steps, save weights checkpoint to disk if self.dqn_direction.current_step % 20000 == 0: self.dqn_direction.save_model_weights( file_path_prefix=f"datasets/Fortnite_direction", is_checkpoint=True) elif self.dqn_direction.mode == "RUN": self.dqn_direction.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes,, {run_time.seconds % 60} seconds" ) print( "GAME: Fortnite PLATFORM: EXE AGENT: DDQN + Prioritized Experience Replay" ) print("") self.dqn_direction.output_step_data() print(f"CURRENT RUN: {self.game_state['current_run']}") print( f"CURRENT RUN REWARD: {round(self.game_state['run_reward_direction'] + self.game_state['run_reward_action'], 2)}" ) print( f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(f"CURRENT HEALTH: {self.game_state['health'][0]}") print(f"CURRENT SCORE: {self.game_state['score'][0]}") print("") print( f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds" ) print("") print( f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})" ) print("") print( f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds" ) print( f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds" ) if self.game_state["health"][1] <= 0: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_direction.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state[ "record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_direction.mode == "RUN" } else: self.game_state["random_time_alives"].append( self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean( self.game_state["random_time_alives"]) self.game_state["current_run_steps"] = 0 self.input_controller.handle_keys([]) if self.dqn_direction.mode == "TRAIN": for i in range(8): run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" ) print( "GAME: Fortnite PLATFORM: EXE AGENT: DDQN + Prioritized Experience Replay" ) print("") print(f"TRAINING ON MINI-BATCHES: {i + 1}/2") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}" ) self.dqn_direction.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_direction"] = 0 self.game_state["run_reward_action"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["health"] = collections.deque(np.full((8, ), 3), maxlen=8) self.game_state["score"] = collections.deque(np.full((8, ), 0), maxlen=8) if self.dqn_direction.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: if self.dqn_direction.type == "DDQN": self.dqn_direction.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_direction.enter_run_mode() else: self.dqn_direction.enter_train_mode() # self.input_controller.tap_key(KeyboardKey.KEY_SPACE) # time.sleep(3) self.input_controller.tap_key(KeyboardKey.KEY_SPACE) return None self.dqn_direction.pick_action() self.dqn_direction.generate_action() keys = self.dqn_direction.get_input_values() print("") print(keys) img = pyautogui.screenshot(region=(0, 0, 1920, 1080)) # convert image to numpy array im = np.array(img) custom = self.detector.CustomObjects(person=True) detections = self.detector.detectCustomObjectsFromImage( custom_objects=custom, input_type="array", input_image=im) for eachObject in detections: print(eachObject["box_points"]) tuple_of_x_and_y = eachObject["box_points"] centerX = (tuple_of_x_and_y[0] + tuple_of_x_and_y[2]) / 2 centerY = (tuple_of_x_and_y[1] + tuple_of_x_and_y[3]) / 2 centerX = int(centerX) centerY = int(centerY) ctypes.windll.user32.SetCursorPos(centerX, centerY) ctypes.windll.user32.mouse_event(2, 0, 0, 0, 0) # left down time.sleep(0.05) ctypes.windll.user32.mouse_event(4, 0, 0, 0, 0) # left up self.shot_reward = 100000 self.input_controller.handle_keys(keys) if self.dqn_direction.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_direction.erode_epsilon(factor=2) self.dqn_direction.next_step() self.game_state["current_run_steps"] += 1 def _reset_game_state(self): self.game_state = { "health": collections.deque(np.full((8, ), 3), maxlen=8), "score": collections.deque(np.full((8, ), 0), maxlen=8), "run_reward_direction": 0, "run_reward_action": 0, "current_run": 1, "current_run_steps": 0, "current_run_health": 0, "current_run_score": 0, "run_predicted_actions": 0, "last_run_duration": 0, "record_time_alive": dict(), "random_time_alive": None, "random_time_alives": list(), "run_timestamp": datetime.utcnow(), } def _measure_actor_hp(self, game_frame): hp_area_frame = serpent.cv.extract_region_from_image( game_frame.frame, self.game.screen_regions["HP_AREA"]) hp_area_image = Image.fromarray(hp_area_frame) actor_hp = 0 image_colors = hp_area_image.getcolors( ) # TODO: remove in favor of sprite detection and location if image_colors: actor_hp = len(image_colors) - 7 for name, sprite in self.game.sprites.items(): sprite_to_locate = Sprite("QUERY", image_data=sprite.image_data) sprite_locator = SpriteLocator() location = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame) print(location) if location: actor_hp = 1000000 return actor_hp def _measure_run_score(self, game_frame): score_area_frame = serpent.cv.extract_region_from_image( game_frame.frame, self.game.screen_regions["SCORE_AREA"]) score_grayscale = np.array(skimage.color.rgb2gray(score_area_frame) * 255, dtype="uint8") score_image = Image.fromarray(score_grayscale) score = '0' image_colors = score_image.getcolors() if image_colors and len(image_colors) > 1: score = serpent.ocr.perform_ocr(image=score_grayscale, scale=10, order=5, horizontal_closing=10, vertical_closing=5) score = score.split(":")[0] count = 0 if not score.isdigit(): score = '0' self.game_state["current_run_score"] = score return score def _calculate_reward(self): reward = 0 reward = self.shot_reward reward += self.game_state["health"][0] / 10.0 return reward, reward
class SerpentCloneyGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) # self.frame_handlers["PLAY"] = self.handle_play self.frame_handlers["PLAY_DDQN"] = self.handle_play_ddqn # self.frame_handler_setups["PLAY"] = self.setup_play self.frame_handler_setups["PLAY_DDQN"] = self.setup_play_ddqn self.analytics_client = None def setup_play(self): self.plugin_path = offshoot.config["file_paths"]["plugins"] # Context Classifier context_classifier_path = f"{self.plugin_path}/SerpentCloneyGameAgentPlugin/files/ml_models/cloney_context_classifier.model" context_classifier = CNNInceptionV3ContextClassifier(input_shape=(288, 512, 3)) context_classifier.prepare_generators() context_classifier.load_classifier(context_classifier_path) self.machine_learning_models["context_classifier"] = context_classifier # Object Detection of leaves self.object_detector = ObjectDetector(graph_fp=f'{self.plugin_path}/SerpentCloneyGameAgentPlugin/files/ml_models/cloney_detection/frozen_inference_graph.pb', labels_fp=f'{self.plugin_path}/SerpentCloneyGameAgentPlugin/files/ml_models/cloney_detection/cloney-detection.pbtxt', num_classes=2, threshold=0.6) # Reset Variables self._reset_game_state() # ============================= # -----------DQN TODO --------- # ============================= def setup_play_ddqn(self): self._reset_game_state() input_mapping = { "UP": [KeyboardKey.KEY_SPACE] } self.key_mapping = { KeyboardKey.KEY_SPACE.name: "UP" } movement_action_space = KeyboardMouseActionSpace( default_keys=[None, "UP"] ) movement_model_file_path = "datasets/cloney_direction_dqn_0_1_.hp5".replace("/", os.sep) self.dqn_movement = DDQN( model_file_path=movement_model_file_path if os.path.isfile(movement_model_file_path) else None, input_shape=(100, 100, 4), input_mapping=input_mapping, action_space=movement_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=1000, batch_size=32, model_learning_rate=1e-4, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False ) def handle_play_ddqn(self, game_frame): gc.disable() if self.dqn_movement.first_run: self.input_controller.tap_key(KeyboardKey.KEY_W) self.dqn_movement.first_run = False time.sleep(5) return None dragon_alive = self._measure_dragon_alive(game_frame) # dragon_coins = self._measure_dragon_coins(game_frame) self.game_state["alive"].appendleft(dragon_alive) # self.game_state["coins"].appendleft(dragon_coins) if self.dqn_movement.frame_stack is None: # pipeline_game_frame = FrameGrabber.get_frames( # [0], # frame_shape=game_frame.frame.shape, # frame_type="MINI" # ).frames[0] self.dqn_movement.build_frame_stack(game_frame.ssim_frame) else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=game_frame.frame.shape, frame_type="MINI" ) if self.dqn_movement.mode == "TRAIN": reward = self._calculate_reward() self.game_state["run_reward"] += reward self.dqn_movement.append_to_replay_memory( game_frame_buffer, reward, terminal=self.game_state["alive"] == 0 ) # Every 2000 steps, save latest weights to disk if self.dqn_movement.current_step % 2000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/cloney_movement" ) # Every 20000 steps, save weights checkpoint to disk if self.dqn_movement.current_step % 20000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/cloney_movement", is_checkpoint=True ) elif self.dqn_movement.mode == "RUN": self.dqn_movement.update_frame_stack(self.game_frame_buffer) run_time = datetime.now() - self.started_at print("\033c" + f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds") print("") print("MOVEMENT NEURAL NETWORK:\n") self.dqn_movement.output_step_data() print("") print(f"CURRENT RUN: {self.game_state['current_run']}") print(f"CURRENT RUN REWARD: {round(self.game_state['run_reward'], 2)}") print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}") print(f"CURRENT DRAGON ALIVE: {self.game_state['alive'][0]}") # print(f"CURRENT DRAGON COINS: {self.game_state['coins'][0]}) print("") # print(f"AVERAGE ACTIONS PER SECOND: {round(self.game_state['average_aps'], 2)}") print("") print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds") # print(f"LAST RUN COINS: {self.game_state['last_run_coins'][0]}) print("") print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})") # print(f"RECORD COINS COLLECTED: {self.game_state['record_coins_collected'].get('value')} coins (Run {self.game_state['record_coins_collected'].get('run')}, {'Predicted' if self.game_state['record_coins_collected'].get('predicted') else 'Training'})") print("") print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds") if self.game_state["alive"][1] <= 0: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() # Set display stuff TODO timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_movement.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN" } # if self.game_state["coins"][0] < self.game_state["record_coins_collected"].get("value", 1000): # self.game_state["record_coins_collected"] = { # "value": self.game_state["coins"][0], # "run": self.game_state["current_run"], # "predicted": self.dqn_movement.mode == "RUN" # } else: self.game_state["random_time_alives"].append(self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean(self.game_state["random_time_alives"]) self.game_state["current_run_steps"] = 0 self.input_controller.release_key(KeyboardKey.KEY_SPACE) if self.dqn_movement.mode == "TRAIN": for i in range(8): serpent.utilities.clear_terminal() print(f"TRAINING ON MINI-BATCHES: {i + 1}/8") print(f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}") self.dqn_movement.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_movement"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["alive"] = collections.deque(np.full((8,), 4), maxlen=8) # self.game_state["coins"] = collections.deque(np.full((8,), 0), maxlen=8) if self.dqn_movement.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0: if self.dqn_movement.type == "DDQN": self.dqn_movement.update_target_model() if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0: self.dqn_movement.enter_run_mode() else: self.dqn_movement.enter_train_mode() self.input_controller.tap_key(KeyboardKey.KEY_SPACE) time.sleep(5) return None self.dqn_movement.pick_action() self.dqn_movement.generate_action() keys = self.dqn_movement.get_input_values() print("") print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), keys)))) self.input_controller.handle_keys(keys) if self.dqn_movement.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_movement.erode_epsilon(factor=2) self.dqn_movement.next_step() self.game_state["current_run_steps"] += 1 def handle_play(self, game_frame): context = self.machine_learning_models["context_classifier"].predict(game_frame.frame) if context is None: return if context == "GAME_WORLD_1": self.display_game_agent_state(context=context) self.handle_play_context_game_world(game_frame=game_frame) self.in_progress_game_over = False elif context == "GAME_OVER": self.display_game_agent_state(context=context) time.sleep(2) if self.in_progress_game_over is False: self.handle_play_context_game_over(game_frame=game_frame) elif context == "MAIN_MENU": self.input_controller.click_screen_region(screen_region="MAIN_MENU_PLAY") time.sleep(3.5) self.current_run_started_at = datetime.utcnow() elif context == "GAME_PAUSE": self.handle_play_context_game_pause(game_frame) def handle_play_context_game_world(self, game_frame): # Only predict if object_detector is idle if self.object_detector.get_status() is False: self.object_predictions = self.object_detector.predict(frame=game_frame.frame) for prediction in self.object_predictions: if prediction['class'] == "dragon": self.positions['dragon_pos_right_x'] = prediction['bb_o'][3] self.positions['dragon_pos_left_x'] = prediction['bb_o'][1] self.positions['dragon_pos_mid_y'] = (prediction['bb_o'][0] + prediction['bb_o'][2]) / 2 self.positions['dragon_pos_mid_x'] = (prediction['bb_o'][1] + prediction['bb_o'][3]) / 2 self.dragon_object = prediction elif prediction['class'] == "leaves": self.positions['leaf_pos_mid_y'] = (prediction['bb_o'][0] + prediction['bb_o'][2]) / 2 self.positions['leaf_pos_top_y'] = prediction['bb_o'][0] self.positions['leaf_pos_bottom_y'] = prediction['bb_o'][2] self.positions['leaf_pos_right_x'] = prediction['bb_o'][3] self.positions['leaf_pos_left_x']= prediction['bb_o'][1] self.leaf_object = prediction if (self.positions['dragon_pos_mid_y'] > (self.positions['leaf_pos_top_y'] - 50) and self.positions['dragon_pos_mid_y'] < (self.positions['leaf_pos_bottom_y']) + 50) and (self.positions['dragon_pos_right_x'] + 100) > self.positions['leaf_pos_left_x']: # Same height self.warning = "HIGH" if self.positions['dragon_pos_right_x'] + 100 > self.positions['leaf_pos_left_x']: self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.025) time.sleep(0.1) elif self.positions['dragon_pos_mid_y'] - 50 < self.positions['leaf_pos_bottom_y']: time.sleep(0.225) self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.025) elif self.positions['dragon_pos_mid_y'] + 50 > self.positions['leaf_pos_top_y']: self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.025) time.sleep(0.1) break else: self.warning = "SAFE" self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.026) time.sleep(0.23) break def handle_play_context_game_over(self, game_frame): self.in_progress_game_over = True time.sleep(4) self.game_state['last_run_duration'] = (datetime.utcnow() - self.game_state['current_run_started_at']).seconds if self.game_state['current_run_started_at'] else 0 self.game_state['last_run'] = self.game_state['current_run'] if self.game_state['record_duration'] is not None: if self.game_state['last_run_duration'] > self.game_state['record_duration']: self.game_state['record_duration'] = self.game_state['last_run_duration'] self.game_state['record_run'] = self.game_state['last_run'] else: self.game_state['record_duration'] = self.game_state['last_run_duration'] # Process Image for OCR frame = game_frame.frame gray_frame = skimage.color.rgb2gray(frame) frame_coins = gray_frame[190:300, 250: 780] frame_distance = gray_frame[355:410, 550:760] frame_time = gray_frame[300:355, 550:760] # Find Coins text_coins = ocr.perform_ocr(image=frame_coins, scale=2, order=5, horizontal_closing=2, vertical_closing=3) # Find Distance text_distance = ocr.perform_ocr(image=frame_distance, scale=2, order=5, horizontal_closing=2, vertical_closing=3) text_time = ocr.perform_ocr(image=frame_time, scale=2, order=5, horizontal_closing=2, vertical_closing=3) print(text_coins) print(text_time) print(text_distance) # if "$" in coins: # num_coins = coins.replace('$', '') # self.game_state['last_run_coins_collected'] = int(num_coins) # # if self.game_state['last_run_coins_collected'] > self.game_state['record_coins_collected']: # self.game_state['record_coins_collected'] = self.game_state['last_run_coins_collected'] # Find Distance and Time #candidates, regions = ocr.extract_ocr_candidates(image=frame, gradient_size=3, closing_size=10, minimum_area=100, minimum_aspect_ratio=2) #print(regions) #gray_frame = skimage.color.rgb2gray(frame) # for region in regions: # crop = gray_frame[region[0]:region[2], region[1]:region[3]] # read = ocr.perform_ocr(image=crop, scale=1, order=5, horizontal_closing=1, vertical_closing=1) # print(read) # if "Distance" in read or "Time": # self.pos_d = regions.index(region) + 1 # elif "Time" in read: # self.pos_t = regions.index(region) + 1 # # if regions.index(region) == self.pos_d: # self.game_state['last_run_distance'] = read.replace('m', '') # if self.game_state['last_run_distance'] > self.game_state['record_distance']: # self.game_state['record_distance'] = self.game_state['last_run_distance'] # elif regions.index(region) == self.pos_t: # self.game_state['last_run_duration_actual'] = read # Have to still check for record. Find out about time formatting # Click PLAY button to start a new run #self.input_controller.tap_key(KeyboardKey.KEY_ENTER) self.input_controller.click_screen_region(screen_region="GAME_OVER_PLAY") # Wait for "Ready, Set, Tap" time.sleep(3) self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(0.2) self.game_state['current_run'] += 1 self.game_state['current_run_started_at'] = datetime.utcnow() def handle_play_context_main_menu(self, game_frame): self.input_controller.click_screen_region(screen_region="MAIN_MENU_PLAY") def handle_play_context_game_pause(self, game_frame): time.sleep(1) self.input_controller.click_screen_region(screen_region="GAME_PAUSE") def display_game_agent_state(self, context): self.game_state['current_run_duration'] = (datetime.utcnow() - self.game_state['current_run_started_at']).seconds print("\033c") print("======================================================") print(f"GAME: Cloney PLATFORM: Steam VERSION: v0.0.1") print("======================================================") print("") print(xtermcolor.colorize("OBJECT DETECTION", ansi=9)) print(f"Detected: {len(self.object_predictions)} objects") if self.warning == "HIGH": print(xtermcolor.colorize(f"Danger Level: {self.warning}", ansi=1)) elif self.warning == "SAFE": print(xtermcolor.colorize(f"Danger Level: {self.warning}", ansi=2)) # print(f"DRAGON POS: {self.dragon_object['bb_o'][0]}, {self.dragon_object['bb_o'][1]}, {self.dragon_object['bb_o'][2]}, {self.dragon_object['bb_o'][3]}") # print(f"LAST LEAF POS: {self.leaf_object['bb_o'][0]}, {self.leaf_object['bb_o'][1]}. {self.leaf_object['bb_o'][2]}. {self.leaf_object['bb_o'][3]}") print("") print(xtermcolor.colorize("GAME STATISTICS", ansi=9)) print(f"Current Context: {context}\n") print(f"Current Run: #{self.game_state['current_run']}") print(f"Current Run Duration: {self.game_state['current_run_duration']}s") print("") print(f"Last Run: #{self.game_state['last_run']}") print(f"Last Run Duration: {self.game_state['last_run_duration']}s") print(f"Last Run Duration Actual: {self.game_state['last_run_duration_actual']}") print(f"Last Run Distance: {self.game_state['last_run_distance']}m") print(f"Last Run Coins Collected: {self.game_state['last_run_coins_collected']}") print(f"Record Duration: {self.game_state['record_duration']}s (Run #{self.game_state['record_run']})") def _reset_game_state(self): # Display Variables self.game_state = { "alive": collections.deque(np.full((8,), 4), maxlen=8), "coins": collections.deque(np.full((8,), 0), maxlen=8), "current_run": 1, "current_run_started_at": datetime.utcnow(), "current_run_duration": None, "current_run_steps": 0, "run_reward": 0, "run_future_rewards": 0, "run_predicted_actions": 0, "run_timestamp": datetime.utcnow(), "last_run": 0, "last_run_duration": 0, "last_run_duration_actual": None, "last_run_distance": 0.0, "last_run_coins_collected": 0, "record_duration": None, "record_duration_actual": 0, "record_run": 0, "record_distance": 0.0, "record_coins_collected": 0, "record_time_alive": dict(), "random_time_alive": None, "random_time_alives": list(), "random_distance_travelled": 0.0 } # Object Detection Variables self.object_predictions = [] self.warning = "" self.dragon_object = [] self.leaf_object = [] self.positions = { 'leaf_pos_mid_y': 0, 'leaf_pos_right_x': 0, 'leaf_pos_left_x': 0, 'leaf_pos_top_y': 0, 'leaf_pos_bottom_y': 0, 'dragon_pos_right_x': 0, 'dragon_pos_left_x': 0, 'dragon_pos_mid_y': 0, 'dragon_pos_mid_x': 0 } # Other Variables self.pos_d = -1 self.pos_t = -1 self.in_progress_game_over = False def _measure_dragon_alive(self, game_frame): dollar_area_frame = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["DOLLAR_AREA"]) dragon_alive = None max_ssim = 0 for name, sprite in self.game.sprites.items(): print(name) print(name[-1]) for i in range(sprite.image_data.shape[3]): ssim = skimage.measure.compare_ssim(dollar_area_frame, np.squeeze(sprite.image_data[..., :3, i]), multichannel=True) if ssim > max_ssim: max_ssim = ssim dragon_alive = 1 # int(name[-1]) return dragon_alive def _measure_dragon_coins(self, game_frame): coins_area_frame = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["COINS_AREA"]) return coins_area_frame[coins_area_frame[..., 2] > 150].size def _calculate_reward(self): reward = 0 reward += (-0.5 if self.game_state["alive"][0] < self.game_state["alive"][1] else 0.05) # reward += (0.5 if (self.game_state["coins"][0] - self.game_state["coins"][1]) >= 1 else -0.05) return reward
class SerpentRoboGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.sprite_locator = SpriteLocator() self.game_state = None self._reset_game_state() def setup_play(self): input_mapping = { "W": [KeyboardKey.KEY_W], "A": [KeyboardKey.KEY_A], "S": [KeyboardKey.KEY_S], "D": [KeyboardKey.KEY_D], "WA": [KeyboardKey.KEY_W, KeyboardKey.KEY_A], "WD": [KeyboardKey.KEY_W, KeyboardKey.KEY_D], "SA": [KeyboardKey.KEY_S, KeyboardKey.KEY_A], "SD": [KeyboardKey.KEY_S, KeyboardKey.KEY_D], "J": [KeyboardKey.KEY_J], "K": [KeyboardKey.KEY_K], "L": [KeyboardKey.KEY_L], "U": [KeyboardKey.KEY_U], "I": [KeyboardKey.KEY_I], "O": [KeyboardKey.KEY_O], "JU": [KeyboardKey.KEY_J, KeyboardKey.KEY_U], "KI": [KeyboardKey.KEY_K, KeyboardKey.KEY_I], "LO": [KeyboardKey.KEY_L, KeyboardKey.KEY_O], "N": [KeyboardKey.KEY_N], "M": [KeyboardKey.KEY_M], "NONE": [] } self.key_mapping = { KeyboardKey.KEY_W.name: "MOVE UP", KeyboardKey.KEY_A.name: "MOVE LEFT", KeyboardKey.KEY_S.name: "MOVE DOWN", KeyboardKey.KEY_D.name: "MOVE RIGHT", KeyboardKey.KEY_J.name: "LIGHT PUNCH", KeyboardKey.KEY_K.name: "MEDIUM PUNCH", KeyboardKey.KEY_L.name: "HARD PUNCH", KeyboardKey.KEY_U.name: "LIGHT KICK", KeyboardKey.KEY_I.name: "MEDIUM KICK", KeyboardKey.KEY_O.name: "HARD KICK", KeyboardKey.KEY_N.name: "START", KeyboardKey.KEY_M.name: "SELECT" } movement_action_space = KeyboardMouseActionSpace(directional_keys=[ "W", "A", "S", "D", "WA", "WD", "SA", "SD", "NONE" ]) fightinput_action_space = KeyboardMouseActionSpace(fightinput_keys=[ "J", "K", "L", "U", "I", "O", "JU", "KI", "LO", "NONE" ]) movement_model_file_path = "datasets/fighting_movement_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_movement = DDQN( model_file_path=movement_model_file_path if os.path.isfile(movement_model_file_path) else None, input_shape=(100, 100, 4), input_mapping=input_mapping, action_space=movement_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=1000, batch_size=32, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False) fightinput_model_file_path = "datasets/fighting_fightinput_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_fightinput = DDQN( model_file_path=fightinput_model_file_path if os.path.isfile(fightinput_model_file_path) else None, input_shape=(100, 100, 4), input_mapping=input_mapping, action_space=fightinput_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=1000, batch_size=32, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False) print("Debug: Game Started") def handle_play(self, game_frame): #print("Debug: Main") title_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_TITLE_TEXT'], game_frame=game_frame) menu_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_MAINMENU_TEXT'], game_frame=game_frame) fightmenu_select_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_FIGHTMENU_SELECT'], game_frame=game_frame) playerselect_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_PLAYERSELECT'], game_frame=game_frame) backbutton_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_BACKBUTTON'], game_frame=game_frame) fightcheck_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_FIGHTCHECK'], game_frame=game_frame) roundstart_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_ROUNDSTART'], game_frame=game_frame) retrybutton_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_FIGHTMENU_RETRY'], game_frame=game_frame) backbutton_locator = sprite_locator.locate( sprite=self.game.sprites['SPRITE_BACKBUTTON'], game_frame=game_frame) (self.p1hp, self.p2hp) = readhp() self.game_state["health"].appendleft(self.p1hp) self.game_state["enemy_health"].appendleft(self.p2hp) if (roundstart_locator): #print("Debug: roundstart_locator Locator") self.game_state["fightstarted"] = True elif (retrybutton_locator): #print("Debug: retrybutton_locator Locator") self.handle_fight_end(game_frame) elif (fightcheck_locator): #print("Debug: fightcheck_locator Locator") self.handle_fight(game_frame) elif (title_locator): #print("Debug: title_locator Locator") self.handle_menu_title(game_frame) elif (menu_locator): #print("Debug: menu_locator Locator") self.handle_menu_select(game_frame) elif (playerselect_locator): #print("Debug: playerselect_locator Locator") self.handle_player_select(game_frame) elif (backbutton_locator): #print("Debug: backbutton_locator Locator") self.handle_backbutton(game_frame) elif ((fightmenu_select_locator) and (self.game_state["current_run"] != 1)): #print("Debug: fightmenu_select_locator Locator") self.handle_fightmenu_select(game_frame) else: return def handle_retry_button(self, game_frame): if (self.game_state["current_run"] % 25 == 0): print(Fore.RED + 'Changing Opponent') print(Style.RESET_ALL) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(0.5) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(1) else: print(Fore.RED + 'Restarting Fight') print(Style.RESET_ALL) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(1) def handle_backbutton(self, game_frame): print(Fore.RED + 'Pressing Select') print(Style.RESET_ALL) self.input_controller.tap_key(KeyboardKey.KEY_M) time.sleep(1) def handle_menu_title(self, game_frame): print(Fore.RED + 'Pressing Start') print(Style.RESET_ALL) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(2) def handle_fightmenu_select(self, game_frame): self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(2) def handle_player_select(self, game_frame): time.sleep(1) print(Fore.RED + 'Choosing one Char') self.input_controller.tap_key(KeyboardKey.KEY_A) time.sleep(0.3) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(0.5) print("Choosing Robo") self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(0.3) self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(0.3) self.input_controller.tap_key(KeyboardKey.KEY_D) time.sleep(0.3) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(0.5) print("Choosing one CPU Char") self.input_controller.tap_key(KeyboardKey.KEY_A) time.sleep(0.3) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(0.3) print("Choosing Random CPU Char") self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(0.3) print("Starting Game") print(Style.RESET_ALL) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(1) def handle_menu_select(self, game_frame): menu_selector = sprite_locator.locate( sprite=self.game.sprites['SPRITE_MAINMENU_SINGLEPLAY'], game_frame=game_frame) if (menu_selector): print(Fore.RED + 'Starting Singleplayer Mode') print(Style.RESET_ALL) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_J) time.sleep(1) else: self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(1) def handle_fight(self, game_frame): gc.disable() if not (self.game_state["fightstarted"]): return if ((self.game_state["health"][0] == 0) and (self.game_state["health"][1] == 0) or (self.game_state["enemy_health"][0] == 0) and (self.game_state["enemy_health"][1] == 0)): return if self.dqn_movement.first_run: self.dqn_movement.first_run = False self.dqn_fightinput.first_run = False return None if self.dqn_movement.frame_stack is None: pipeline_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64").frames[0] self.dqn_movement.build_frame_stack(pipeline_game_frame.frame) self.dqn_fightinput.frame_stack = self.dqn_movement.frame_stack else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64") if self.dqn_movement.mode == "TRAIN": reward_movement, reward_fightinput = self._calculate_reward() self.game_state["run_reward_movement"] += reward_movement self.game_state["run_reward_fightinput"] += reward_fightinput self.dqn_movement.append_to_replay_memory( game_frame_buffer, reward_movement, terminal=self.game_state["health"] == 0) self.dqn_fightinput.append_to_replay_memory( game_frame_buffer, reward_fightinput, terminal=self.game_state["health"] == 0) # Every 2000 steps, save latest weights to disk if self.dqn_movement.current_step % 2000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/fighting_movement") self.dqn_fightinput.save_model_weights( file_path_prefix=f"datasets/fighting_fightinput") # Every 20000 steps, save weights checkpoint to disk if self.dqn_movement.current_step % 20000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/fighting_movement", is_checkpoint=True) self.dqn_fightinput.save_model_weights( file_path_prefix=f"datasets/fighting_fightinput", is_checkpoint=True) elif self.dqn_movement.mode == "RUN": self.dqn_movement.update_frame_stack(game_frame_buffer) self.dqn_fightinput.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print("") print(Fore.YELLOW) print(Style.BRIGHT) print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" ) print(Style.RESET_ALL) print("") print(Fore.GREEN) print(Style.BRIGHT) print("MOVEMENT NEURAL NETWORK:\n") self.dqn_movement.output_step_data() print("") print("FIGHT NEURAL NETWORK:\n") self.dqn_fightinput.output_step_data() print(Style.RESET_ALL) print("") print(Style.BRIGHT) print(f"CURRENT RUN: {self.game_state['current_run']}") print( f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds" ) print("") print( f"CURRENT RUN REWARD: {round(self.game_state['run_reward_movement'] + self.game_state['run_reward_fightinput'], 4)}" ) print( f"COMBO MULTIPLICATOR: {self.game_state['multiplier_damage']}") print(f"CURRENT HEALTH: {self.game_state['health'][0]}") print( f"CURRENT ENEMY HEALTH: {self.game_state['enemy_health'][0]}") print("") print( f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(Style.RESET_ALL) self.dqn_movement.pick_action() self.dqn_movement.generate_action() self.dqn_fightinput.pick_action( action_type=self.dqn_movement.current_action_type) self.dqn_fightinput.generate_action() movement_keys = self.dqn_movement.get_input_values() fightinput_keys = self.dqn_fightinput.get_input_values() print("") print(Fore.GREEN) print(Style.BRIGHT) print("" + " + ".join( list( map(lambda k: self.key_mapping.get(k.name), movement_keys + fightinput_keys)))) print(Style.RESET_ALL) self.input_controller.handle_keys(movement_keys + fightinput_keys) if self.dqn_movement.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_movement.erode_epsilon(factor=2) self.dqn_fightinput.erode_epsilon(factor=2) self.dqn_movement.next_step() self.dqn_fightinput.next_step() self.game_state["current_run_steps"] += 1 def _reset_game_state(self): self.game_state = { "health": collections.deque(np.full((8, ), 6), maxlen=8), "enemy_health": collections.deque(np.full((8, ), 654), maxlen=8), "current_run": 1, "current_run_steps": 0, "run_reward_movement": 0, "run_reward_fightinput": 0, "run_future_rewards": 0, "run_predicted_actions": 0, "run_timestamp": datetime.utcnow(), "last_run_duration": 0, "record_time_alive": dict(), "record_enemy_hp": dict(), "random_time_alive": None, "random_time_alives": list(), "random_enemy_hp": None, "random_enemy_hps": list(), "fightstarted": None, "multiplier_damage": 0 } def _calculate_reward(self): reward_movement = 0 reward_fightinput = 0 # getting hit by enemy if self.game_state["health"][0] < self.game_state["health"][1]: self.game_state["multiplier_damage"] = 0 reward_movement += -0.10 reward_fightinput += -0.10 else: reward_movement += 0.05 # hitting the enemy if self.game_state["enemy_health"][0] < self.game_state[ "enemy_health"][1]: # combo multiplicator self.game_state["multiplier_damage"] += 0.20 if self.game_state["multiplier_damage"] > 1: self.game_state["multiplier_damage"] = 1 # check how much dmg the attack did and add 0.05 per class to reward if (self.game_state["enemy_health"][1] - self.game_state["enemy_health"][0]) > 150: # light reward_fightinput += 0.05 if (self.game_state["enemy_health"][1] - self.game_state["enemy_health"][0]) > 500: # medium reward_fightinput += 0.05 if (self.game_state["enemy_health"][1] - self.game_state["enemy_health"][0]) > 750: # hard reward_fightinput += 0.05 # calculate reward reward_fightinput += (1 * self.game_state["multiplier_damage"]) else: reward_fightinput += -0.05 reward_movement += -0.01 # enemy wasnt hit for 5 rounds if self.game_state["enemy_health"][0] == self.game_state[ "enemy_health"][5]: self.game_state["multiplier_damage"] = 0 # return rewards return reward_movement, reward_fightinput def handle_fight_end(self, game_frame): self.game_state["fightstarted"] = None self.input_controller.handle_keys([]) self.game_state["current_run"] += 1 self.handle_fight_training(game_frame) def handle_fight_training(self, game_frame): serpent.utilities.clear_terminal() timestamp = datetime.utcnow() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds gc.enable() gc.collect() gc.disable() if self.dqn_movement.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state[ "record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN", "enemy_hp": self.game_state["enemy_health"][0] } if self.game_state["enemy_health"][0] < self.game_state[ "record_enemy_hp"].get("value", 1000): self.game_state["record_enemy_hp"] = { "value": self.game_state["enemy_health"][0], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN", "time_alive": self.game_state["last_run_duration"] } else: self.game_state["random_time_alives"].append( self.game_state["last_run_duration"]) self.game_state["random_enemy_hps"].append( self.game_state["enemy_health"][0]) self.game_state["random_time_alive"] = np.mean( self.game_state["random_time_alives"]) self.game_state["random_enemy_hp"] = np.mean( self.game_state["random_enemy_hps"]) self.game_state["current_run_steps"] = 0 self.input_controller.handle_keys([]) if self.dqn_movement.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print("") print(Fore.GREEN) print(Style.BRIGHT) print(f"TRAINING ON MINI-BATCHES: {i + 1}/16") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 25 == 0 else ''}" ) print(Style.RESET_ALL) self.dqn_movement.train_on_mini_batch() self.dqn_fightinput.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["run_reward_movement"] = 0 self.game_state["run_reward_fightinput"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["health"] = collections.deque(np.full((8, ), 6), maxlen=8) self.game_state["enemy_health"] = collections.deque(np.full((8, ), 654), maxlen=8) if self.dqn_movement.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: self.dqn_movement.update_target_model() self.dqn_fightinput.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_movement.enter_run_mode() self.dqn_fightinput.enter_run_mode() else: self.dqn_movement.enter_train_mode() self.dqn_fightinput.enter_train_mode() self.handle_retry_button(game_frame)
class SerpentGeometryDashGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.game_state = None self._reset_game_state() def setup_play(self): input_mapping = {"SPACE": [KeyboardKey.KEY_SPACE]} self.key_mapping = {KeyboardKey.KEY_SPACE.name: "SPACE"} action_space = KeyboardMouseActionSpace(action_keys=[None, "SPACE"]) action_model_file_path = "datasets/GeometryDash_action_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_action = DDQN( model_file_path=action_model_file_path if os.path.isfile(action_model_file_path) else None, input_shape=(self.game.frame_height, self.game.frame_width, 4), input_mapping=input_mapping, action_space=action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=10000, batch_size=32, model_learning_rate=1e-4, initial_epsilon=0.25, final_epsilon=0.01, override_epsilon=False) def handle_play(self, game_frame): gc.disable() for i, game_frame in enumerate(self.game_frame_buffer.frames): self.visual_debugger.store_image_data( game_frame.grayscale_frame, game_frame.grayscale_frame.shape, str(i)) if self.dqn_action.first_run: self.input_controller.tap_key(KeyboardKey.KEY_SPACE) self.dqn_action.first_run = False time.sleep(5) return None actor_hp = self._measure_actor_hp(game_frame) run_score = self._measure_run_score(game_frame) self.game_state["health"].appendleft(actor_hp) self.game_state["score"].appendleft(run_score) if self.dqn_action.frame_stack is None: full_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE").frames[0] self.dqn_action.build_frame_stack(full_game_frame.frame) else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE") if self.dqn_action.mode == "TRAIN": reward_action = self._calculate_reward() self.game_state["run_reward_action"] = max( self.game_state["run_reward_action"], reward_action) self.dqn_action.append_to_replay_memory( game_frame_buffer, reward_action, terminal=self.game_state["health"] == 0) # Every 2000 steps, save latest weights to disk if self.dqn_action.current_step % 2000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"datasets/GeometryDash_action") # Every 20000 steps, save weights checkpoint to disk if self.dqn_action.current_step % 20000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"datasets/GeometryDash_action", is_checkpoint=True) elif self.dqn_action.mode == "RUN": self.dqn_action.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" ) print( "GAME: GeometryDash PLATFORM: Steam AGENT: DDQN + Prioritized Experience Replay" ) print("") self.dqn_action.output_step_data() print(f"CURRENT RUN: {self.game_state['current_run']}") print( f"CURRENT RUN REWARD: {round(self.game_state['run_reward_action'], 2)}" ) print( f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(f"CURRENT HEALTH: {self.game_state['health'][0]}") print(f"CURRENT SCORE: {self.game_state['score'][0]}") print("") print( f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds" ) print("") print( f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})" ) print("") print( f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds" ) if self.game_state["health"][1] <= 0: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_action.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state[ "record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_action.mode == "RUN" } else: self.game_state["random_time_alives"].append( self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean( self.game_state["random_time_alives"]) self.game_state["current_run_steps"] = 0 self.input_controller.handle_keys([]) if self.dqn_action.mode == "TRAIN": for i in range(8): run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" ) print( "GAME: GeometryDash PLATFORM: Steam AGENT: DDQN + Prioritized Experience Replay" ) print("") print(f"TRAINING ON MINI-BATCHES: {i + 1}/8") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}" ) print(f"LAST RUN: {self.game_state['current_run']}") print( f"LAST RUN REWARD: {round(self.game_state['run_reward_action'], 2)}" ) print( f"LAST RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(f"LAST SCORE: {self.game_state['score'][0]}") print("") print( f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})" ) print("") print( f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds" ) self.dqn_action.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_action"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["health"] = collections.deque(np.full((8, ), 3), maxlen=8) self.game_state["score"] = collections.deque(np.full((8, ), 0), maxlen=8) if self.dqn_action.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: if self.dqn_action.type == "DDQN": self.dqn_action.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_action.enter_run_mode() else: self.dqn_action.enter_train_mode() self.input_controller.tap_key(KeyboardKey.KEY_SPACE) time.sleep(1) #self.input_controller.tap_key(KeyboardKey.KEY_SPACE) return None self.dqn_action.pick_action() self.dqn_action.generate_action() keys = self.dqn_action.get_input_values() print("") print("PRESSING: ", end='') print(" + ".join( list(map(lambda k: self.key_mapping.get(k.name), keys)))) self.input_controller.handle_keys(keys) if self.dqn_action.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_action.erode_epsilon(factor=2) self.dqn_action.next_step() self.game_state["current_run_steps"] += 1 def _reset_game_state(self): self.game_state = { "health": collections.deque(np.full((8, ), 3), maxlen=8), "score": collections.deque(np.full((8, ), 0), maxlen=8), "run_reward_action": 0, "current_run": 1, "current_run_steps": 0, "current_run_health": 0, "current_run_score": 0, "run_predicted_actions": 0, "last_run_duration": 0, "record_time_alive": dict(), "random_time_alive": None, "random_time_alives": list(), "run_timestamp": datetime.utcnow(), } def _measure_actor_hp(self, game_frame): hp_area_grayscale = serpent.cv.extract_region_from_image( game_frame.grayscale_frame, self.game.screen_regions["SCORE_AREA"]) try: threshold = skimage.filters.threshold_otsu(hp_area_grayscale) except ValueError: threshold = 0 return (1 if threshold > 90 else 0) def _measure_run_score(self, game_frame): score_grayscale = serpent.cv.extract_region_from_image( game_frame.grayscale_frame, self.game.screen_regions["SCORE_AREA"]) try: threshold = skimage.filters.threshold_otsu(score_grayscale) except ValueError: threshold = 0 bw_score_bar = score_grayscale > threshold score = str(bw_score_bar[bw_score_bar > 0].size) self.game_state["current_run_score"] = score return score def _calculate_reward(self): reward = int(self.game_state["score"][0]) return reward
class SerpentSlayTheSpireGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) global prevContext prevContext = "None" self.game_state = None self._reset_game_state() serpent.utilities.clear_terminal() print("------------------------------------") print("Starting up . . . ") self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play def setup_play(self): global input_mapping print("------------------------------------") print("Loading Image Classifer . . . ") print("------------------------------------") plugin_path = offshoot.config["file_paths"]["plugins"] context_classifier_path = f"{plugin_path}/SerpentSlayTheSpireGameAgentPlugin/files/ml_models/context_classifier.model" context_classifier = CNNInceptionV3ContextClassifier(input_shape=(384, 512, 3)) # Replace with the shape (rows, cols, channels) of your captured context frames context_classifier.prepare_generators() context_classifier.load_classifier(context_classifier_path) self.machine_learning_models["context_classifier"] = context_classifier input_mapping = { 1: [KeyboardKey.KEY_1], 2: [KeyboardKey.KEY_2], 3: [KeyboardKey.KEY_3], 4: [KeyboardKey.KEY_4], 5: [KeyboardKey.KEY_5], "E": [KeyboardKey.KEY_E] } action_space = KeyboardMouseActionSpace( card_inputs=[1, 2, 3, 4, 5, "E"] ) card_selection_model_file_path = "datasets/tdar31_slaythespire_dqn_0.9981189999999986_.h5".replace("/", os.sep) # DDQN setup self.dqn_card_selection = DDQN( model_file_path=card_selection_model_file_path if os.path.isfile(card_selection_model_file_path) else None, input_shape=(90, 160, 4), input_mapping=input_mapping, action_space=action_space, replay_memory_size=1000, max_steps=1000, observe_steps=100, batch_size=64, model_learning_rate=1e-4, initial_epsilon=1, final_epsilon=0.01, override_epsilon=True ) def find_index(self): return print("TEST") def handle_play(self, game_frame): global prevContext context = self.machine_learning_models["context_classifier"].predict(game_frame.frame) print(context) if context != prevContext: prevContext = context time.sleep(1) return print("context doesn't match prevContext") if context == "DEATH_MENU": self.handle_DEATH_MENU(game_frame, context) elif context == "BATTLE_STAGE": self.handle_BATTLE_STAGE(game_frame, context) elif context == "REWARD_STAGE": self.handle_REWARD_STAGE(game_frame, context) # While these classes aren't used; the image classifer is trained to check for them # and maybe prove useful/important as I expand on the project. So even though they # currently do nothing I am going to leave them for now elif context == "START_RUN": self.handle_START_RUN(game_frame, context) elif context == "MAP_MENU": self.handle_MAP_MENU(game_frame, context) elif context == "MERCHANT_MENU": self.handle_MERCHANT_MENU(game_frame, context) elif context == "MERCHANT_PRE_MENU": self.handle_MERCHANT_PRE_MENU(game_frame, context) elif context == "REST_STAGE": self.handle_REST_STAGE(game_frame, context) elif context == "SMITH_DECK_LIST": self.handle_SMITH_DECK_LIST(game_frame, context) def _reset_game_state(self): self.game_state = { "current_run": 0, "current_run_steps": 0, "last_run_duration": 0, "record_time_alive": dict(), "random_time_alive": None, "random_time_alives": list(), "run_timestamp": datetime.utcnow(), "masterCardList": ["Strike_G", "Strike_G", "Strike_G", "Strike_G", "Defend_G", "Defend_G", "Defend_G", "Defend_G", "Poisoned Stab", "Neutralize", "Dodge and Roll"], "player_energy_available": [3], "player_energy_total": [3], "player_health": [70], "final_cultist_attack": [0], "poison_check": [False], "run_reward_selection": 0, "run_predicted_selection": 0, } def handle_DEATH_MENU(self, game_frame, context): print("INSIDE DEATH_MENU function") global prevContext prevContext = "DEATH_STAGE" time.sleep(1) death_menuing_Xcoords = [639, 644, 104, 344, 634, 1207] death_menuing_Ycoords = [622, 637, 440, 376, 579, 593] menuing_delays = [1, 2, 1, 1, 1, 1] for elem in range(6): self.input_controller.move(x=death_menuing_Xcoords[elem], y=death_menuing_Ycoords[elem], duration=0.25, absolute=True) self.input_controller.click(button=MouseButton.LEFT, duration=0.25) time.sleep(menuing_delays[elem]) time.sleep(1) self.fight_setup() # Remove relic and reset deck def fight_setup(self): console_commands = ["relic remove r", "deck remove a", "fight cu"] self.input_controller.type_string("~", 0.05) for elem in range(3): self.input_controller.type_string(console_commands[elem], 0.05) self.input_controller.tap_key(KeyboardKey.KEY_TAB) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) self.input_controller.type_string("~", 0.05) time.sleep(1) self.game_state["current_run"] += 1 print("self.dqn_card_selection.mode: --- ", self.dqn_card_selection.mode) if self.dqn_card_selection.mode in ["TRAIN", "RUN"]: print("if self.dqn_card_selection.mode in ['TRAIN', 'RUN']:") print("----------------------------------------------------") time.sleep(2) if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0: if self.dqn_card_selection.type == "DDQN": self.dqn_card_selection.update_target_model() if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0: self.dqn_card_selection.enter_run_mode() else: self.dqn_card_selection.enter_train_mode() # NOW INSIDE BATTLE STAGE BUT FUNCTION HASN'T BEEN TRIGGERED BY IMAGE CONTEXT CLASSIFER self.populating_deck() def populating_deck(self): masterCardList = self.game_state["masterCardList"] time.sleep(1) self.input_controller.type_string("~", 0.05) time.sleep(1) prefixCmd = "hand add " for elem in range(len(masterCardList)): print(masterCardList[elem]) tempCard = "" tempCardSelection = "" tempCard = masterCardList[elem] tempCardSelection = prefixCmd + tempCard self.input_controller.type_string(tempCardSelection, 0.05) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) # Closes console self.input_controller.type_string("~", 0.05) # Ends turn self.input_controller.tap_key(KeyboardKey.KEY_E) time.sleep(1.5) def handle_BATTLE_STAGE(self, game_frame, context): print("INSIDE BATTLE_STAGE function") global prevContext prevContext = "BATTLE_STAGE" # Player Energy player_energy = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["PLAYER_ENERGY"]) player_energy_grayscale = np.array(skimage.color.rgb2gray(player_energy) * 255, dtype="uint8") player_energy = serpent.ocr.perform_ocr(image=player_energy_grayscale, scale=15, order=5, horizontal_closing=2, vertical_closing=1) print("player_energy") print(player_energy) # Parses returned value from tesseract for grabbing current energy # Issue is that because of the swirling animation behind the numbers the OCR isn't 100% at returning this value correctly # The '/' is by far the most consistant value returned and it's the char that the energy values revolve around so if it's not found we force the program to refresh and grab a new game_image and try again if '/' in player_energy: print("player_energy INSIDE IF STATEMENT") finalArr = [] ## Examples of type of values returned by OCR when grabbing energy # 3/3 <- correct # '3/3 # "3 /3 # 53 /3 # 27/ 3 # "3/3' # 3/3. # .3 /3 # *3/3 for elem in player_energy: # Next layer of parsing # If the value isn't '/' or an integer it isn't pushed into finalArr if (elem == "/") or (elem.isdigit() == True): finalArr.append(elem) # Final check # if the length of the list is greater 3 or '/' isn't in the second position # Techincally this fails if the player has 10 or more energy but due to how infrequently this happens I don't have a conditional check for it if len(finalArr) < 3 or (finalArr[1] != "/"): return print("Failed to capture energy successfully // len(finalArr) < 3) or finalArr[1] != '/'") # Capture available and total player energy player_energy_available = finalArr[0] player_energy_total = finalArr[2] print("------------------------------------") print(player_energy_available, "/", player_energy_total) print("------------------------------------") time.sleep(1) self.game_state["player_energy_available"].insert(0, player_energy_available) self.game_state["player_energy_total"].insert(0, player_energy_total) else: print(player_energy) return print("Failed to capture energy successfully // captured energy value doesn't have '/'") # Player Health player_total_health = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["PLAYER_HEALTH"]) player_total_health_grayscale = np.array(skimage.color.rgb2gray(player_total_health) * 255, dtype="uint8") player_health = serpent.ocr.perform_ocr(image=player_total_health_grayscale, scale=15, order=5, horizontal_closing=2, vertical_closing=1) tempArr = [] for elem in player_health: if (elem.isdigit() == True): tempArr.append(elem) if (elem == "/"): break player_health = ''.join(tempArr) print("player_health", player_health) self.game_state["player_health"].insert(0, player_health) time.sleep(.5) self.enemy_action_capture(game_frame) def enemy_action_capture(self, game_frame): final_cultist_attack = [] attack_cultist_temp_list= [] # Unselect anything just incase self.input_controller.click(button=MouseButton.RIGHT, duration=0.25) time.sleep(.5) # Home hover self.input_controller.move(x=636, y=375, duration=0.25, absolute=True) time.sleep(1) # Enemy hover self.input_controller.move(x=959, y=410, duration=0.25, absolute=True) time.sleep(.75) image_data = skimage.io.imread("plugins/SerpentSlayTheSpireGamePlugin/files/data/sprites/sprite_Attack_for_0.png")[..., np.newaxis] attack_for_cultist = Sprite("attack_for_cultist", image_data=image_data) # Full game frame capture # print("-----------Full game frame capture-----------") # full_game_frame = FrameGrabber.get_frames( # [0], # frame_shape=(self.game.frame_height, self.game.frame_width), # frame_type="PIPELINE" # ).frames[0] # Allows for dynamic capture of enemy attack sprite_locator = SpriteLocator() attack_for_cultist_location = sprite_locator.locate(sprite=attack_for_cultist, game_frame=game_frame) print("attack_for_cultist_location: ", attack_for_cultist_location) # Tuples are immutable :( if (attack_for_cultist_location != None): attack_cultist_temp_list = list(attack_for_cultist_location) attack_cultist_temp_list[1] = attack_cultist_temp_list[1] + 45 attack_cultist_temp_list[3] = attack_cultist_temp_list[3] + 15 attack_for_cultist_location = tuple(attack_cultist_temp_list) print("Updated - attack_for_cultist_location: ", attack_for_cultist_location) time.sleep(1) cultist_attack = serpent.cv.extract_region_from_image(game_frame.frame, attack_for_cultist_location) cultist_attack_grayscale = np.array(skimage.color.rgb2gray(cultist_attack) * 255, dtype="uint8") cultist_attack = serpent.ocr.perform_ocr(image=cultist_attack_grayscale, scale=15, order=5, horizontal_closing=2, vertical_closing=1) # This is actually an awkward work around for limitations in how tesseract works. By default it doesn't capture single char values so when dynamically # searching and capturing the enemy attack the region it's looking for the region that includes the word "for " + attack value (i.e. "for 6"). There # are ways of swapping the mode of tesseract to do a capture for single char values but because the attack values are dynamic it sometimes is # less than 10 or much greater than 10 which is now multiple char's and messes with the capture. For the sake of just getting it working I did this # TLDR: Awkward workaround for limitation in tesseract when capturing single char values. Likely easier way to capture then parse attack value for elem in cultist_attack: if (elem.isdigit() == True): final_cultist_attack.append(elem) print("final_cultist_attack", final_cultist_attack) final_cultist_attack = ''.join(final_cultist_attack) print("final_cultist_attack: ", final_cultist_attack) print("------------------------------------") self.game_state["final_cultist_attack"].insert(0, final_cultist_attack) self.poison_check(game_frame) else: return print("Failed to capture enemy attack") def poison_check(self, game_frame): image_data = skimage.io.imread("plugins/SerpentSlayTheSpireGamePlugin/files/data/sprites/sprite_poison_check_0.png")[..., np.newaxis] poison_check = Sprite("poison_check", image_data=image_data) # # Full game frame capture # print("-----------Full game frame capture-----------") # full_game_frame = FrameGrabber.get_frames( # [0], # frame_shape=(self.game.frame_height, self.game.frame_width), # frame_type="PIPELINE" # ).frames[0] sprite_locator = SpriteLocator() poison_check_location = sprite_locator.locate(sprite=poison_check, game_frame=game_frame) print("poison_check_location: ", poison_check_location) if (poison_check_location != None): self.game_state["poison_check"].insert(0, True) print("POISON_CHECK == TRUE") else: self.game_state["poison_check"].insert(0, False) print("POISON_CHECK == FALSE") self.ddqn_setup(game_frame) def ddqn_setup(self, game_frame): gc.disable() if self.dqn_card_selection.first_run: self.dqn_card_selection.first_run = False print("---------------first_run---------------") return None timestamp_now = datetime.utcnow() runtime_total = timestamp_now - self.game_state["run_timestamp"] time.sleep(1) print("self.dqn_card_selection.mode", self.dqn_card_selection.mode) if self.dqn_card_selection.frame_stack is None: full_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE" ).frames[0] print("self.dqn_card_selection.frame_stack is None") self.dqn_card_selection.build_frame_stack(full_game_frame.frame) self.dqn_card_selection.frame_stack = self.dqn_card_selection.frame_stack else: print("ELSE -- self.dqn_card_selection.frame_stack is None // game_frame_buffer") print("INSIDE ELSE self.dqn_card_selection.mode", self.dqn_card_selection.mode) game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE" ) if self.dqn_card_selection.mode == "TRAIN": print("self.dqn_card_selection.mode == TRAIN", self.dqn_card_selection.mode) time.sleep(2) # calculates reward then appends it to replay memory reward_selection = self.calculate_reward() self.game_state["run_reward_selection"] += reward_selection self.dqn_card_selection.append_to_replay_memory( game_frame_buffer, reward_selection, terminal=self.game_state["player_health"] == 0 ) if self.dqn_card_selection.current_step % 100 == 0: self.dqn_card_selection.save_model_weights( file_path_prefix=f"datasets/tdar31_slaythespire_selection" ) if self.dqn_card_selection.current_step % 500 == 0: self.dqn_card_selection.save_model_weights( file_path_prefix=f"datasets/tdar31_slaythespire_selection" ) if self.dqn_card_selection.current_step % 5000 == 0: self.dqn_card_selection.save_model_weights( file_path_prefix=f"datasets/tdar31_slaythespire_selection", is_checkpoint=True ) elif self.dqn_card_selection.mode == "RUN": self.dqn_card_selection.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print(f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds") print("") print("Selection NN:\n") self.dqn_card_selection.output_step_data() print("") print(f"RUN: {self.game_state['current_run']}") print(f"RUN REWARD: {round(self.game_state['run_reward_selection'], 2)}") print(f"RUN PREDICTED ACTIONS: {self.game_state['run_predicted_selection']}") print(f"PLAYER HEALTH: {self.game_state['player_health'][0]}") print(f"PLAYER ENERGY AVAILABLE: {self.game_state['player_energy_available'][0]}") print(f"PLAYER ENERGY TOTAL: {self.game_state['player_energy_total'][0]}") print(f"PLAYER ENERGY AVAILABLE: {self.game_state['player_energy_available'][0]}") print(f"POISON CHECK: {self.game_state['poison_check'][0]}") print(f"RUN TIME: {runtime_total.seconds} seconds") print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds") self.dqn_card_selection.pick_action() self.dqn_card_selection.generate_action() card_selection_keys = self.dqn_card_selection.get_input_values() print("card_selection_keys", card_selection_keys) ddqnInputSelection = card_selection_keys[0] print("ddqnInputSelection", ddqnInputSelection) print("self.dqn_card_selection.current_action_type", self.dqn_card_selection.current_action_type) # Starts as random? Once frame stack is built out swaps to PREDICTED? as in delibrate choice when running .action # appending to memory + setting up reward calucation? if self.dqn_card_selection.current_action_type == "PREDICTED": self.game_state["run_predicted_selection"] += 1 self.dqn_card_selection.erode_epsilon(factor=2) self.dqn_card_selection.next_step() self.ddqn_action_output(ddqnInputSelection) def ddqn_action_output(self, ddqnInputSelection): print(ddqnInputSelection) # Unselects anything just incase self.input_controller.click(button=MouseButton.RIGHT, duration=0.25) if ddqnInputSelection == "KeyboardKey.KEY_E": print("ddqnInputSelection = E // End turn") self.input_controller.tap_key(ddqnInputSelection) time.sleep(.5) else: # This is where the chosen card is actually selected then played self.input_controller.tap_key(ddqnInputSelection) time.sleep(.5) play_card_Xcoords = [636, 959] play_card_Ycoords = [375, 410] for elem in range(2): self.input_controller.move(x=play_card_Xcoords[elem], y=play_card_Ycoords[elem], duration=0.25, absolute=True) time.sleep(1) self.input_controller.click(button=MouseButton.LEFT, duration=0.25) time.sleep(.5) def calculate_reward(self): reward = 0 # 1 to 9 damage taken that turn // -5 reward -= (5 if (int(self.game_state["player_health"][1])) - (int(self.game_state["player_health"][0])) <= 9 else 0) # 10 or more damage taken that turn // -10 reward -= (10 if (int(self.game_state["player_health"][1])) - (int(self.game_state["player_health"][0])) >= 10 else 0) # Slight penalty if no card is played aka no energy used // -3 reward -= (3 if (int(self.game_state["player_energy_available"][0])) == (int(self.game_state["player_energy_total"][0])) else 0) # Energy used to play card regardless of what it does // +10 reward += (10 if (int(self.game_state["player_energy_available"][0])) < (int(self.game_state["player_energy_total"][1])) else 0) # If enemy poisoned // +5 reward += (5 if (self.game_state["poison_check"][0] == True) else 0) return reward def handle_REWARD_STAGE(self, game_frame, context): print("INSIDE REWARD_STAGE function") global prevContext prevContext = "REWARD_STAGE" time.sleep(.5) play_card_Xcoords = [1249, 952, 563] play_card_Ycoords = [26, 146, 456] for elem in range(3): self.input_controller.move(x=play_card_Xcoords[elem], y=play_card_Ycoords[elem], duration=0.25, absolute=True) time.sleep(.75) self.input_controller.click(button=MouseButton.LEFT, duration=0.25) time.sleep(.5) # def handle_MERCHANT_PRE_MENU(self, game_frame, context): # print("INSIDE MERCHANT_PRE_MENU function") # global prevContext # prevContext = "MERCHANT_PRE_MENU" # time.sleep(1) # def handle_MERCHANT_MENU(self, game_frame, context): # print("INSIDE MERCHANT_MENU function") # global prevContext # prevContext = "MERCHANT_MENU" # def handle_MAP_MENU(self, game_frame, context): # print("INSIDE MAP_MENU function") # global prevContext # prevContext = "MAP_MENU" # time.sleep(1) # def handle_REST_STAGE(self, game_frame, context): # print("INSIDE REST_STAGE function") # global prevContext # prevContext = "REST_STAGE" # time.sleep(1) # def handle_SMITH_DECK_LIST(self, game_frame, context): # print("INSIDE SMITH_DECK_LIST function") # global prevContext # prevContext = "SMITH_DECK_LIST" # time.sleep(1)
class SerpentSpaceInvadersGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.analytics_client = None self.game_state = None self._reset_game_state() init() def setup_play(self): # ALL Key/Input Mappings for SNES Emulator input_mapping = { "UP": [KeyboardKey.KEY_W], "LEFT": [KeyboardKey.KEY_A], "DOWN": [KeyboardKey.KEY_S], "RIGHT": [KeyboardKey.KEY_D], "START": [KeyboardKey.KEY_ENTER], "SELECT": [KeyboardKey.KEY_BACKSPACE], "B": [KeyboardKey.KEY_Z], "A": [KeyboardKey.KEY_V], "Y": [KeyboardKey.KEY_X], "X": [KeyboardKey.KEY_C], "L": [KeyboardKey.KEY_B], "R": [KeyboardKey.KEY_N] } self.key_mapping = { KeyboardKey.KEY_W.name: "UP", KeyboardKey.KEY_A.name: "LEFT", KeyboardKey.KEY_S.name: "DOWN", KeyboardKey.KEY_D.name: "RIGHT", KeyboardKey.KEY_ENTER.name: "START", KeyboardKey.KEY_BACKSPACE.name: "SELECT", KeyboardKey.KEY_Z.name: "B", KeyboardKey.KEY_V.name: "A", KeyboardKey.KEY_X.name: "Y", KeyboardKey.KEY_C.name: "X", KeyboardKey.KEY_B.name: "L", KeyboardKey.KEY_N.name: "R" } # Game Specific Inputs direction_action_space = KeyboardMouseActionSpace( direction_keys=[None, "LEFT", "RIGHT"]) action_space = KeyboardMouseActionSpace( action_keys=[None, "B", "A", "Y", "X"]) direction_model_file_path = "datasets/spaceinvaders_direction_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_direction = DDQN( model_file_path=direction_model_file_path if os.path.isfile(direction_model_file_path) else None, input_shape=(100, 100, 4), input_mapping=input_mapping, action_space=direction_action_space, replay_memory_size=40000, max_steps=3000000, observe_steps=5000, batch_size=32, model_learning_rate=1e-4, initial_epsilon=1.0, final_epsilon=0.1, override_epsilon=False) action_model_file_path = "datasets/spaceinvaders_action_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_action = DDQN( model_file_path=action_model_file_path if os.path.isfile(action_model_file_path) else None, input_shape=(100, 100, 4), input_mapping=input_mapping, action_space=action_space, replay_memory_size=40000, max_steps=3000000, observe_steps=5000, batch_size=32, model_learning_rate=1e-4, initial_epsilon=1.0, final_epsilon=0.1, override_epsilon=False) def handle_play(self, game_frame): gc.disable() if self.dqn_direction.first_run: self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(0.5) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(0.5) self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(0.5) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(0.5) self.input_controller.tap_key(KeyboardKey.KEY_N) # 1 Credit self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(5) self.dqn_direction.first_run = False self.dqn_action.first_run = False return None vessel_hp = self._measure_hp(game_frame) vessel_score = self._measure_score(game_frame) # vessel_credits = self._measure_credits(game_frame) self.game_state["health"].appendleft(vessel_hp) self.game_state["score"].appendleft(vessel_score) # self.game_state["credits"].appendleft(vessel_credits) if self.dqn_direction.frame_stack is None: pipeline_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE").frames[0] self.dqn_direction.build_frame_stack(pipeline_game_frame.frame) self.dqn_action.frame_stack = self.dqn_direction.frame_stack else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE") if self.dqn_direction.mode == "TRAIN": reward_direction, reward_action = self._calculate_reward() self.game_state["run_reward_direction"] += reward_direction self.game_state["run_reward_action"] += reward_action self.dqn_direction.append_to_replay_memory( game_frame_buffer, reward_direction, terminal=self.game_state["health"] == 0) self.dqn_action.append_to_replay_memory( game_frame_buffer, reward_action, terminal=self.game_state["health"] == 0) # Every 2000 steps, save latest weights to disk if self.dqn_direction.current_step % 2000 == 0: self.dqn_direction.save_model_weights( file_path_prefix=f"datasets/spaceinvaders_direction") self.dqn_action.save_model_weights( file_path_prefix=f"datasets/spaceinvaders_action") # Every 20000 steps, save weights checkpoint to disk if self.dqn_direction.current_step % 20000 == 0: self.dqn_direction.save_model_weights( file_path_prefix=f"datasets/spaceinvaders_direction", is_checkpoint=True) self.dqn_action.save_model_weights( file_path_prefix=f"datasets/spaceinvaders_action", is_checkpoint=True) elif self.dqn_direction.mode == "RUN": self.dqn_direction.update_frame_stack(game_frame_buffer) self.dqn_action.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( "\033[31m" + f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" + "\033[37m") print("GAME: Space Invaders PLATFORM: SNES AGENT: DDQN + PER") print("") print("\033[32m" + "DIRECTION NEURAL NETWORK INFO:\n" + "\033[37m") self.dqn_direction.output_step_data() print("") print("\033[32m" + "ACTION NEURAL NETWORK INFO:\n" + "\033[37m") self.dqn_action.output_step_data() print("") print(f"CURRENT RUN: {self.game_state['current_run']}") print( f"CURRENT RUN REWARD: {round(self.game_state['run_reward_direction'] + self.game_state['run_reward_action'], 2)}" ) print( f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(f"CURRENT HEALTH: {self.game_state['health'][0]}") print(f"CURRENT SCORE: {self.game_state['score'][0]}") # print(f"CURRENT CREDITS: {self.game_state['credits'][0]}") print("") print( f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds" ) print("") print( f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})" ) print("") print( f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds" ) if self.game_state["health"][2] <= 0: serpent.utilities.clear_terminal() print("ENTERING THE HEALTH <= 0 PART") timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_direction.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state[ "record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_direction.mode == "RUN" } else: self.game_state["random_time_alives"].append( self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean( self.game_state["random_time_alives"]) self.game_state["current_run_steps"] = 0 self.input_controller.handle_keys([]) if self.dqn_direction.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print( "\033[31m" + f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" + "\033[37m") print( "\033[32m" + "GAME: Space Invaders PLATFORM: Steam AGENT: DDQN + PER" + "\033[37m") print("") print("TRAINING ON MINI-BATCHES:" + "\033[32m" + f"{i + 1}/16" + "\033[37m") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}" ) self.dqn_direction.train_on_mini_batch() self.dqn_action.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_direction"] = 0 self.game_state["run_reward_action"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["health"] = collections.deque(np.full((8, ), 3), maxlen=8) self.game_state["score"] = collections.deque(np.full((8, ), 0), maxlen=8) if self.dqn_direction.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: if self.dqn_direction.type == "DDQN": self.dqn_direction.update_target_model() self.dqn_action.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_direction.enter_run_mode() self.dqn_action.enter_run_mode() else: self.dqn_direction.enter_train_mode() self.dqn_action.enter_train_mode() time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_N) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(6) return None self.dqn_direction.pick_action() self.dqn_direction.generate_action() self.dqn_action.pick_action( action_type=self.dqn_direction.current_action_type) self.dqn_action.generate_action() keys = self.dqn_direction.get_input_values( ) + self.dqn_action.get_input_values() print("") print("PRESSING: ", end='') print(" + ".join( list(map(lambda k: self.key_mapping.get(k.name), keys)))) self.input_controller.handle_keys(keys) if self.dqn_direction.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_direction.erode_epsilon(factor=2) self.dqn_action.erode_epsilon(factor=2) self.dqn_direction.next_step() self.dqn_action.next_step() self.game_state["current_run_steps"] += 1 def _reset_game_state(self): self.game_state = { "health": collections.deque(np.full((8, ), 3), maxlen=8), "score": collections.deque(np.full((8, ), 0), maxlen=8), "run_reward_direction": 0, "run_reward_action": 0, "current_run": 1, "current_run_steps": 0, "current_run_health": 3, "current_run_score": 0, "run_predicted_actions": 0, "last_run_duration": 0, "record_time_alive": dict(), "random_time_alive": None, "random_time_alives": list(), "run_timestamp": datetime.utcnow(), } def _measure_score(self, game_frame): score_area_frame = serpent.cv.extract_region_from_image( game_frame.frame, self.game.screen_regions["GAME_CURRENT_SCORE"]) score_grayscale = np.array(skimage.color.rgb2gray(score_area_frame) * 255, dtype="uint8") score = serpent.ocr.perform_ocr(image=score_grayscale, scale=10, order=1, horizontal_closing=1, vertical_closing=1) count = 0 if len(score) == 4 and score.isdigit() and score != '0000': for char in score: if char == '0': count = count + 1 else: break score = score[count:] else: score = '0' self.game_state["current_run_score"] = score return score def _measure_hp(self, game_frame): hp_area_frame = serpent.cv.extract_region_from_image( game_frame.frame, self.game.screen_regions["GAME_CURRENT_HEALTH"]) vessel_hp = 0 max_ssim = 0 for name, sprite in self.game.sprites.items(): for i in range(sprite.image_data.shape[3]): ssim = skimage.measure.compare_ssim( hp_area_frame, np.squeeze(sprite.image_data[..., i]), multichannel=True) if ssim > max_ssim: max_ssim = ssim vessel_hp = int(name[-1]) return vessel_hp # def _measure_credits(self, game_frame): # # OCR or Sprites if inconsistent (see TiamatX health) # credits_area_frame = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["GAME_CURRENT_CREDITS"]) # # credits_grayscale = np.array(skimage.color.rgb2gray(credits_area_frame) * 255, dtype="uint8") # # credits = serpent.ocr.perform_ocr(image=credits_grayscale, scale=10, order=5, horizontal_closing=10, vertical_closing=5) # # count = 0 # # if len(credits) == 2 and credits.isdigit(): # for char in credits: # if char == '0': # count = count + 1 # else: # break # credits = credits[count:] # else: # credits = '50' # # self.game_state["current_run_credits"] = credits # # return credits def _calculate_reward(self): reward = 0 # reward += (-1.0 if self.game_state["credits"][0] < self.game_state["credits"][1] else 0.1) reward += (-0.5 if self.game_state["health"][0] < self.game_state["health"][1] else 0.05) reward += (0.75 if (int(self.game_state["score"][0]) - int(self.game_state["score"][1])) >= 10 else -0.075) return reward, reward
class SerpentBindingOfIsaacAfterbirthGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.game_state = None self._reset_game_state() @property def bosses(self): return {"MONSTRO": "1010"} def setup_play(self): input_mapping = { "W": [KeyboardKey.KEY_W], "A": [KeyboardKey.KEY_A], "S": [KeyboardKey.KEY_S], "D": [KeyboardKey.KEY_D], "WA": [KeyboardKey.KEY_W, KeyboardKey.KEY_A], "WD": [KeyboardKey.KEY_W, KeyboardKey.KEY_D], "SA": [KeyboardKey.KEY_S, KeyboardKey.KEY_A], "SD": [KeyboardKey.KEY_S, KeyboardKey.KEY_D], "UP": [KeyboardKey.KEY_UP], "LEFT": [KeyboardKey.KEY_LEFT], "DOWN": [KeyboardKey.KEY_DOWN], "RIGHT": [KeyboardKey.KEY_RIGHT] } self.key_mapping = { KeyboardKey.KEY_W.name: "MOVE UP", KeyboardKey.KEY_A.name: "MOVE LEFT", KeyboardKey.KEY_S.name: "MOVE DOWN", KeyboardKey.KEY_D.name: "MOVE RIGHT", KeyboardKey.KEY_UP.name: "SHOOT UP", KeyboardKey.KEY_LEFT.name: "SHOOT LEFT", KeyboardKey.KEY_DOWN.name: "SHOOT DOWN", KeyboardKey.KEY_RIGHT.name: "SHOOT RIGHT", } movement_action_space = KeyboardMouseActionSpace(directional_keys=[ None, "W", "A", "S", "D", "WA", "WD", "SA", "SD" ]) projectile_action_space = KeyboardMouseActionSpace( projectile_keys=[None, "UP", "LEFT", "DOWN", "RIGHT"]) movement_model_file_path = "datasets/binding_of_isaac_movement_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_movement = DDQN( model_file_path=movement_model_file_path if os.path.isfile(movement_model_file_path) else None, input_shape=(100, 100, 4), input_mapping=input_mapping, action_space=movement_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=1000, batch_size=32, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False) projectile_model_file_path = "datasets/binding_of_isaac_projectile_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_projectile = DDQN( model_file_path=projectile_model_file_path if os.path.isfile(projectile_model_file_path) else None, input_shape=(100, 100, 4), input_mapping=input_mapping, action_space=projectile_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=1000, batch_size=32, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False) if sys.platform in ["linux", "linux2"]: pyperclip.set_clipboard("xsel") pyperclip.copy(f"goto s.boss.{self.bosses['MONSTRO']}") def handle_play(self, game_frame): gc.disable() if self.dqn_movement.first_run: self._goto_boss() self.dqn_movement.first_run = False self.dqn_projectile.first_run = False return None hearts = frame_to_hearts(game_frame.frame, self.game) # Check for Curse of Unknown if not len(hearts): self.input_controller.tap_key(KeyboardKey.KEY_R, duration=1.5) self._goto_boss() return None self.game_state["health"].appendleft(24 - hearts.count(None)) self.game_state["boss_health"].appendleft( self._get_boss_health(game_frame)) if self.dqn_movement.frame_stack is None: pipeline_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64").frames[0] self.dqn_movement.build_frame_stack(pipeline_game_frame.frame) self.dqn_projectile.frame_stack = self.dqn_movement.frame_stack else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64") if self.dqn_movement.mode == "TRAIN": reward_movement, reward_projectile = self._calculate_reward() self.game_state["run_reward_movement"] += reward_movement self.game_state["run_reward_projectile"] += reward_projectile self.dqn_movement.append_to_replay_memory( game_frame_buffer, reward_movement, terminal=self.game_state["health"] == 0) self.dqn_projectile.append_to_replay_memory( game_frame_buffer, reward_projectile, terminal=self.game_state["health"] == 0) # Every 2000 steps, save latest weights to disk if self.dqn_movement.current_step % 2000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/binding_of_isaac_movement") self.dqn_projectile.save_model_weights( file_path_prefix=f"datasets/binding_of_isaac_projectile" ) # Every 20000 steps, save weights checkpoint to disk if self.dqn_movement.current_step % 20000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/binding_of_isaac_movement", is_checkpoint=True) self.dqn_projectile.save_model_weights( file_path_prefix= f"datasets/binding_of_isaac_projectile", is_checkpoint=True) elif self.dqn_movement.mode == "RUN": self.dqn_movement.update_frame_stack(game_frame_buffer) self.dqn_projectile.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" ) print("") print("MOVEMENT NEURAL NETWORK:\n") self.dqn_movement.output_step_data() print("") print("PROJECTILE NEURAL NETWORK:\n") self.dqn_projectile.output_step_data() print("") print(f"CURRENT RUN: {self.game_state['current_run']}") print( f"CURRENT RUN REWARD: {round(self.game_state['run_reward_movement'] + self.game_state['run_reward_projectile'], 2)}" ) print( f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(f"CURRENT HEALTH: {self.game_state['health'][0]}") print(f"CURRENT BOSS HEALTH: {self.game_state['boss_health'][0]}") print("") print( f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds" ) print("") print( f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'}, Boss HP {self.game_state['record_time_alive'].get('boss_hp')})" ) print( f"RECORD BOSS HP: {self.game_state['record_boss_hp'].get('value')} (Run {self.game_state['record_boss_hp'].get('run')}, {'Predicted' if self.game_state['record_boss_hp'].get('predicted') else 'Training'}, Time Alive {self.game_state['record_boss_hp'].get('time_alive')} seconds)" ) print("") print( f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds" ) print( f"RANDOM AVERAGE BOSS HP: {self.game_state['random_boss_hp']}") is_boss_dead = self._is_boss_dead( self.game_frame_buffer.previous_game_frame) if self.game_state["health"][1] <= 0 or is_boss_dead: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_movement.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state[ "record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN", "boss_hp": self.game_state["boss_health"][0] } if self.game_state["boss_health"][0] < self.game_state[ "record_boss_hp"].get("value", 1000): self.game_state["record_boss_hp"] = { "value": self.game_state["boss_health"][0], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN", "time_alive": self.game_state["last_run_duration"] } else: self.game_state["random_time_alives"].append( self.game_state["last_run_duration"]) self.game_state["random_boss_hps"].append( self.game_state["boss_health"][0]) self.game_state["random_time_alive"] = np.mean( self.game_state["random_time_alives"]) self.game_state["random_boss_hp"] = np.mean( self.game_state["random_boss_hps"]) self.game_state["current_run_steps"] = 0 self.input_controller.handle_keys([]) self.input_controller.tap_key(KeyboardKey.KEY_R, duration=1.5) if self.dqn_movement.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print(f"TRAINING ON MINI-BATCHES: {i + 1}/16") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}" ) self.dqn_movement.train_on_mini_batch() self.dqn_projectile.train_on_mini_batch() self.game_state["boss_skull_image"] = None self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_movement"] = 0 self.game_state["run_reward_projectile"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["health"] = collections.deque(np.full((8, ), 6), maxlen=8) self.game_state["boss_health"] = collections.deque(np.full( (8, ), 654), maxlen=8) if self.dqn_movement.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: self.dqn_movement.update_target_model() self.dqn_projectile.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_movement.enter_run_mode() self.dqn_projectile.enter_run_mode() else: self.dqn_movement.enter_train_mode() self.dqn_projectile.enter_train_mode() self._goto_boss() return None self.dqn_movement.pick_action() self.dqn_movement.generate_action() self.dqn_projectile.pick_action( action_type=self.dqn_movement.current_action_type) self.dqn_projectile.generate_action() movement_keys = self.dqn_movement.get_input_values() projectile_keys = self.dqn_projectile.get_input_values() print("") print(" + ".join( list( map(lambda k: self.key_mapping.get(k.name), movement_keys + projectile_keys)))) self.input_controller.handle_keys(movement_keys + projectile_keys) if self.dqn_movement.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_movement.erode_epsilon(factor=2) self.dqn_projectile.erode_epsilon(factor=2) self.dqn_movement.next_step() self.dqn_projectile.next_step() self.game_state["current_run_steps"] += 1 def _reset_game_state(self): self.game_state = { "health": collections.deque(np.full((8, ), 6), maxlen=8), "boss_health": collections.deque(np.full((8, ), 654), maxlen=8), "boss_skull_image": None, "current_run": 1, "current_run_steps": 0, "run_reward_movement": 0, "run_reward_projectile": 0, "run_future_rewards": 0, "run_predicted_actions": 0, "run_timestamp": datetime.utcnow(), "last_run_duration": 0, "record_time_alive": dict(), "record_boss_hp": dict(), "random_time_alive": None, "random_time_alives": list(), "random_boss_hp": None, "random_boss_hps": list() } def _goto_boss(self): self.input_controller.tap_key(KeyboardKey.KEY_SPACE) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_GRAVE) time.sleep(0.5) self.input_controller.tap_keys( [KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_V]) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(0.5) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(0.5) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(0.2) def _get_boss_health(self, game_frame): gray_boss_health_bar = serpent.cv.extract_region_from_image( game_frame.grayscale_frame, self.game.screen_regions["HUD_BOSS_HP"]) try: threshold = skimage.filters.threshold_otsu(gray_boss_health_bar) except ValueError: threshold = 1 bw_boss_health_bar = gray_boss_health_bar > threshold return bw_boss_health_bar[bw_boss_health_bar > 0].size def _is_boss_dead(self, game_frame): gray_boss_skull = serpent.cv.extract_region_from_image( game_frame.grayscale_frame, self.game.screen_regions["HUD_BOSS_SKULL"]) if self.game_state["boss_skull_image"] is None: self.game_state["boss_skull_image"] = gray_boss_skull is_dead = False if skimage.measure.compare_ssim( gray_boss_skull, self.game_state["boss_skull_image"]) < 0.5: is_dead = True self.game_state["boss_skull_image"] = gray_boss_skull return is_dead def _calculate_reward(self): reward_movement = 0 reward_projectile = 0 reward_movement += (-1 if self.game_state["health"][0] < self.game_state["health"][1] else 0.05) reward_projectile += (1 if self.game_state["boss_health"][0] < self.game_state["boss_health"][3] else -0.05) return reward_movement, reward_projectile
class SerpentVVVVVVGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.game_state = None self._reset_game_state() def setup_play(self): input_mapping = { #"UP": [KeyboardKey.KEY_UP], "LEFT": [KeyboardKey.KEY_LEFT], #"DOWN": [KeyboardKey.KEY_DOWN], "RIGHT": [KeyboardKey.KEY_RIGHT] } self.key_mapping = { #KeyboardKey.KEY_UP.name: "UP", KeyboardKey.KEY_LEFT.name: "LEFT", #KeyboardKey.KEY_DOWN.name: "DOWN", KeyboardKey.KEY_RIGHT.name: "RIGHT" } direction_action_space = KeyboardMouseActionSpace( direction_keys=[None, "LEFT", "RIGHT"]) direction_model_file_path = "datasets/vvvvvv_direction_dqn_0_1_.h5".replace( "/", os.sep) self.dqn_direction = DDQN( model_file_path=direction_model_file_path if os.path.isfile(direction_model_file_path) else None, input_shape=(480, 640, 4), input_mapping=input_mapping, action_space=direction_action_space, replay_memory_size=5000, max_steps=1000000, observe_steps=600, batch_size=32, model_learning_rate=1e-4, initial_epsilon=1, final_epsilon=0.01, override_epsilon=False) def handle_play(self, game_frame): gc.disable() for i, game_frame in enumerate(self.game_frame_buffer.frames): self.visual_debugger.store_image_data(game_frame.frame, game_frame.frame.shape, str(i)) if self.dqn_direction.first_run: #self.input_controller.tap_key(KeyboardKey.KEY_SPACE) #time.sleep(5) self.input_controller.tap_key(KeyboardKey.KEY_SPACE) self.dqn_direction.first_run = False time.sleep(5) return None actor_hp = self._measure_actor_hp(game_frame) run_score = self._measure_run_score(game_frame) self.game_state["health"].appendleft(actor_hp) self.game_state["score"].appendleft(run_score) if self.dqn_direction.frame_stack is None: full_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE").frames[0] self.dqn_direction.build_frame_stack(full_game_frame.frame) else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE") if self.dqn_direction.mode == "TRAIN": reward_direction, reward_action = self._calculate_reward() self.game_state["run_reward_direction"] += reward_direction self.game_state["run_reward_action"] += reward_action self.dqn_direction.append_to_replay_memory( game_frame_buffer, reward_direction, terminal=self.game_state["health"] == 0) # Every 2000 steps, save latest weights to disk if self.dqn_direction.current_step % 2000 == 0: self.dqn_direction.save_model_weights( file_path_prefix=f"datasets/vvvvvv_direction") # Every 20000 steps, save weights checkpoint to disk if self.dqn_direction.current_step % 20000 == 0: self.dqn_direction.save_model_weights( file_path_prefix=f"datasets/vvvvvv_direction", is_checkpoint=True) elif self.dqn_direction.mode == "RUN": self.dqn_direction.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" ) print( "GAME: VVVVVV PLATFORM: Steam AGENT: DDQN + Prioritized Experience Replay" ) print("") self.dqn_direction.output_step_data() print(f"CURRENT RUN: {self.game_state['current_run']}") print( f"CURRENT RUN REWARD: {round(self.game_state['run_reward_direction'] + self.game_state['run_reward_action'], 2)}" ) print( f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(f"CURRENT HEALTH: {self.game_state['health'][0]}") print(f"CURRENT SCORE: {self.game_state['score'][0]}") print("") print( f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds" ) print("") print( f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})" ) print("") print( f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds" ) if self.game_state["health"][1] <= 0: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_direction.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state[ "record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_direction.mode == "RUN" } else: self.game_state["random_time_alives"].append( self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean( self.game_state["random_time_alives"]) self.game_state["current_run_steps"] = 0 self.input_controller.handle_keys([]) if self.dqn_direction.mode == "TRAIN": for i in range(8): run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print( f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds" ) print( "GAME: VVVVVV PLATFORM: Steam AGENT: DDQN + Prioritized Experience Replay" ) print("") print(f"TRAINING ON MINI-BATCHES: {i + 1}/2") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}" ) self.dqn_direction.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_direction"] = 0 self.game_state["run_reward_action"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["health"] = collections.deque(np.full((8, ), 3), maxlen=8) self.game_state["score"] = collections.deque(np.full((8, ), 0), maxlen=8) if self.dqn_direction.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: if self.dqn_direction.type == "DDQN": self.dqn_direction.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_direction.enter_run_mode() else: self.dqn_direction.enter_train_mode() #self.input_controller.tap_key(KeyboardKey.KEY_SPACE) #time.sleep(3) self.input_controller.tap_key(KeyboardKey.KEY_SPACE) return None self.dqn_direction.pick_action() self.dqn_direction.generate_action() keys = self.dqn_direction.get_input_values() print("") print("PRESSING: ", end='') print(" + ".join( list(map(lambda k: self.key_mapping.get(k.name), keys)))) self.input_controller.handle_keys(keys) if self.dqn_direction.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_direction.erode_epsilon(factor=2) self.dqn_direction.next_step() self.game_state["current_run_steps"] += 1 def _reset_game_state(self): self.game_state = { "health": collections.deque(np.full((8, ), 3), maxlen=8), "score": collections.deque(np.full((8, ), 0), maxlen=8), "run_reward_direction": 0, "run_reward_action": 0, "current_run": 1, "current_run_steps": 0, "current_run_health": 0, "current_run_score": 0, "run_predicted_actions": 0, "last_run_duration": 0, "record_time_alive": dict(), "random_time_alive": None, "random_time_alives": list(), "run_timestamp": datetime.utcnow(), } def _measure_actor_hp(self, game_frame): hp_area_frame = serpent.cv.extract_region_from_image( game_frame.frame, self.game.screen_regions["HP_AREA"]) hp_area_image = Image.fromarray(hp_area_frame) actor_hp = 0 image_colors = hp_area_image.getcolors( ) # TODO: remove in favor of sprite detection and location if image_colors: actor_hp = len(image_colors) - 7 for name, sprite in self.game.sprites.items(): query_sprite = Sprite("QUERY", image_data=sprite.image_data) sprite_name = self.sprite_identifier.identify( query_sprite, mode="CONSTELLATION_OF_PIXELS" ) # Will be "UNKNOWN" if no match print(sprite_name) sprite_to_locate = Sprite("QUERY", image_data=sprite.image_data) sprite_locator = SpriteLocator() location = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame) print(location) if location: actor_hp = 1000000 return actor_hp def _measure_run_score(self, game_frame): score_area_frame = serpent.cv.extract_region_from_image( game_frame.frame, self.game.screen_regions["SCORE_AREA"]) score_grayscale = np.array(skimage.color.rgb2gray(score_area_frame) * 255, dtype="uint8") score_image = Image.fromarray(score_grayscale) score = '0' image_colors = score_image.getcolors() if image_colors and len(image_colors) > 1: score = serpent.ocr.perform_ocr(image=score_grayscale, scale=10, order=5, horizontal_closing=10, vertical_closing=5) score = score.split(":")[0] count = 0 if not score.isdigit(): score = '0' self.game_state["current_run_score"] = score return score def _calculate_reward(self): reward = 0 reward += self.game_state["health"][0] / 10.0 reward += (0.5 if (int(self.game_state["score"][0]) - int(self.game_state["score"][1])) >= 0 else -0.25) return reward, reward
class SerpentPika2GameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers["PLAY"] = self.handle_play self.frame_handler_setups["PLAY"] = self.setup_play self.previous_game_frame = None self.lowerY = np.array([255, 255, 0], np.uint8) self.upperY = np.array([255, 255, 10], np.uint8) self.lowerR = np.array([255, 0, 0], np.uint8) self.upperR = np.array([255, 0, 10], np.uint8) self.game_state = None self._reset_game_state() def setup_key(self): self.input_mapping = { "JUMP": [KeyboardKey.KEY_UP], "RIGHT": [KeyboardKey.KEY_RIGHT], "LEFT": [KeyboardKey.KEY_LEFT], "LEFT JUMP": [KeyboardKey.KEY_LEFT, KeyboardKey.KEY_UP], "RIGHT JUMP": [KeyboardKey.KEY_RIGHT, KeyboardKey.KEY_UP], "HIT": [KeyboardKey.KEY_RETURN], "None": [] } self.key_mapping = { KeyboardKey.KEY_UP: "UP", KeyboardKey.KEY_RIGHT: "RIGHT", KeyboardKey.KEY_DOWN: "DOWN", KeyboardKey.KEY_LEFT: "LEFT", KeyboardKey.KEY_RETURN: "HIT" } self.action_space = KeyboardMouseActionSpace(action=['None', 'HIT']) self.move_action_space = KeyboardMouseActionSpace( action=['None', 'JUMP', 'RIGHT', 'LEFT']) ''' move_inputs = { "JUMP": [KeyboardKey.KEY_UP], "RIGHT": [KeyboardKey.KEY_RIGHT], "LEFT": [KeyboardKey.KEY_LEFT], "NO_MOVE": [] } attack_inputs = { "Power Hit": [KeyboardKey.KEY_RETURN], "NO_HIT": [] } self.game_inputs = dict() for move_label, attack_label in itertools.product(move_inputs, attack_inputs): label = f"{move_label.ljust(10)}{attack_label}" self.game_inputs[label] = move_inputs[move_label] + attack_inputs[attack_label] print(self.game_inputs) ''' def setup_play(self): #self.cid = 0 self.trainID = 0 self.setup_key() self.frame_process = False self.rewards = list() self.started_at = datetime.now() self.started_at_str = self.started_at.isoformat() self.save_point_path = 'score.npy' if os.path.isfile(self.save_point_path): self.score_record = np.load(self.save_point_path) else: self.score_record = np.zeros(shape=(0, )) self.collision_count_path = 'collision.npy' self.reward_sum = 0 if os.path.isfile(self.collision_count_path): self.collision_counter = np.load(self.collision_count_path) else: self.collision_counter = np.zeros(shape=(0, )) latest_epsilon = 1 action_model_path = 'dqn_action_0_1_.h5' model_list = os.listdir('model/action') for item in model_list: for epsilon in re.findall("\d+\.\d+", item): if latest_epsilon > float(epsilon): latest_epsilon = float(epsilon) action_model_path = item action_model_path = f'model/action/{action_model_path}'.replace( '/', os.sep) self.dqn_action = DDQN( model_file_path=action_model_path if os.path.isfile(action_model_path) else None, input_shape=(100, 100, 4), input_mapping=self.input_mapping, action_space=self.action_space, replay_memory_size=5000, max_steps=2000000, observe_steps=100 if os.path.isfile(action_model_path) else 2000, batch_size=32, initial_epsilon=1, final_epsilon=0.01, override_epsilon=True) latest_epsilon = 1 move_model_path = 'dqn_move_0_1_.h5' model_list = os.listdir('model/move') for item in model_list: for epsilon in re.findall("\d+\.\d+", item): if latest_epsilon > float(epsilon): latest_epsilon = float(epsilon) move_model_path = item move_model_path = f'model/move/{move_model_path}'.replace('/', os.sep) self.dqn_move = DDQN( model_file_path=move_model_path if os.path.isfile(move_model_path) else None, input_shape=(100, 100, 4), input_mapping=self.input_mapping, action_space=self.move_action_space, replay_memory_size=5000, max_steps=2000000, observe_steps=100 if os.path.isfile(move_model_path) else 2000, batch_size=32, initial_epsilon=1, final_epsilon=0.01, override_epsilon=True) print('Starting Game') self.input_controller.tap_key(KeyboardKey.KEY_RETURN) def getDifference(self, game_frame, previous_game_frame): return game_frame.grayscale_frame - previous_game_frame.grayscale_frame def handle_play(self, game_frame): # append memory data into game state (self.game_state["com_x"], self.game_state["com_y"], self.ai_x, self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc, self.col_size, self.game_state["col_x"], self.game_state["col_y"]) = readInfo() self.game_state["ai_x"].appendleft(self.ai_x) self.game_state["ai_y"].appendleft(self.ai_y) self.game_state["ball_x"].appendleft(self.ball_x) self.game_state["ball_y"].appendleft(self.ball_y) self.game_state["ai_score"].appendleft(self.ai_sc) self.game_state["com_score"].appendleft(self.com_sc) self.game_state["col_size"].appendleft(self.col_size) # judge is-in-game by read pixel value (tricky) self.game_frame_img = FrameGrabber.get_frames( [0], frame_type="PIPELINE").frames[0].frame if self.game_frame_img[91, 49] != 0.3607843137254902: self.handle_notInGame() else: self.game_state["playing"] = True self.handle_fight(game_frame) def handle_notInGame(self): serpent.utilities.clear_terminal() print('Currently not in game...please wait..') playAnimation(self.game_state["animeIndex"]) self.game_state["animeIndex"] = self.game_state[ "animeIndex"] + 1 if self.game_state["animeIndex"] < 3 else 0 #print(self.game_frame_img[75:97,47:52]) self.input_controller.tap_key(KeyboardKey.KEY_RETURN) time.sleep(0.3) def handle_fight(self, game_frame): gc.disable() if self.dqn_action.first_run: self.dqn_action.first_run = False self.dqn_move.first_run = False return if self.dqn_action.frame_stack is None: game_frame_buffer = FrameGrabber.get_frames( [0], frame_type="PIPELINE").frames[0] self.dqn_action.build_frame_stack(game_frame_buffer.frame) self.dqn_move.frame_stack = self.dqn_action.frame_stack else: game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") if self.dqn_action.mode == "TRAIN": reward = self._calculate_reward() self.game_state["reward"] = reward self.dqn_action.append_to_replay_memory( game_frame_buffer, reward, terminal=self.game_state["ai_score"][0] == 15) self.dqn_move.append_to_replay_memory( game_frame_buffer, reward, terminal=self.game_state["ai_score"][0] == 15) # Every 2000 steps, save latest weights to disk if self.dqn_action.current_step % 1000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"model/action/dqn_action") self.dqn_move.save_model_weights( file_path_prefix=f"model/move/dqn_move") # Every 20000 steps, save weights checkpoint to disk if self.dqn_action.current_step % 10000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"model/action/dqn_action", is_checkpoint=True) self.dqn_move.save_model_weights( file_path_prefix=f"model/move/dqn_move", is_checkpoint=True) elif self.dqn_action.mode == "RUN": self.dqn_action.update_frame_stack(game_frame_buffer) self.dqn_move.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print('') print(Fore.YELLOW) print(Style.BRIGHT) print(f"STARTED AT:{self.started_at_str}") print( f"RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} s" ) print(Style.RESET_ALL) #print("") print(Fore.GREEN) print(Style.BRIGHT) print("MOVEMENT NEURAL NETWORK:\n") self.dqn_move.output_step_data() print("") print("ACTION NEURAL NETWORK:\n") self.dqn_action.output_step_data() print(Style.RESET_ALL) print(Style.BRIGHT) print(f"CURRENT RUN: {self.game_state['current_run'] }") print("") print( f"CURRENT RUN REWARD: {round(self.game_state['reward'], 4)}") print(f"CURRENT AI SCORE: {self.game_state['ai_score'][0]}") print(f"CURRENT ENEMY SCORE: {self.game_state['com_score'][0]}") print("") print( f"PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(Style.RESET_ALL) self.dqn_action.pick_action() #self.dqn_action.pick_action(action_type="PREDICTED") self.dqn_action.generate_action() self.dqn_move.pick_action( action_type=self.dqn_action.current_action_type) #self.dqn_move.pick_action(action_type="PREDICTED") self.dqn_move.generate_action() movement_keys = self.dqn_move.get_input_values( ) + self.dqn_action.get_input_values() print("") print(Fore.GREEN) print(Style.BRIGHT) #print(movement_keys) #print(" + ".join(list(map(lambda k: self.key_mapping.get(k), movement_keys)))) print(" + ".join( list( map(lambda k: self.key_mapping.get(k).ljust(5), movement_keys)))) print(Style.RESET_ALL) print("") print( f"AI: ({self.game_state['ai_x'][0]}, {self.game_state['ai_y'][0]})" ) print( f"COM: ({self.game_state['com_x']}, {self.game_state['com_y']})" ) print( f"BALL: ({self.game_state['ball_x'][0]}, {self.game_state['ball_y'][0]})" ) print( f"Collision: ({self.game_state['col_x']}, {self.game_state['col_y']}, {self.game_state['col_size'][0]})" ) print(f"Distance: {self.game_state['distance'][0]}") self.input_controller.handle_keys(movement_keys) if self.dqn_action.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_action.erode_epsilon(factor=2) self.dqn_move.erode_epsilon(factor=2) self.dqn_action.next_step() self.dqn_move.next_step() self.game_state["current_run"] += 1 if self.game_state['ai_score'][0] == 15 or self.game_state[ 'com_score'][0] == 15: # Game over self.game_state["ai_score"].appendleft(0) self.game_state["com_score"].appendleft(0) self.score_record = np.append(self.score_record, self.game_state['ai_score'][1]) np.save(self.save_point_path, self.score_record) self.handle_fight_end(game_frame) def handle_fight_end(self, game_frame): self.game_state["playing"] = False self.input_controller.handle_keys([]) self.game_state["current_run"] += 1 self.handle_fight_training(game_frame) def handle_fight_training(self, game_frame): #self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) serpent.utilities.clear_terminal() gc.enable() gc.collect() gc.disable() print("TRAIN MODE") self.input_controller.handle_keys([]) if self.dqn_action.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print("") print(Fore.GREEN) print(Style.BRIGHT) print(f"TRAINING ON MINI-BATCHES: {i + 1}/16") print( f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 25 == 0 else ''}" ) print(Style.RESET_ALL) self.dqn_action.train_on_mini_batch() self.dqn_move.train_on_mini_batch() self.game_state["run_predicted_actions"] = 0 if self.dqn_action.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 100 == 0: self.dqn_action.update_target_model() self.dqn_move.update_target_model() if self.game_state["current_run"] > 0 and self.game_state[ "current_run"] % 20 == 0: self.dqn_action.enter_run_mode() self.dqn_move.enter_run_mode() else: self.dqn_action.enter_train_mode() self.dqn_move.enter_train_mode() self.input_controller.tap_key(KeyboardKey.KEY_RETURN) time.sleep(2) def _reset_game_state(self): self.game_state = { "reward": 0, "animeIndex": 0, "current_run": 1, "playing": False, "run_predicted_actions": 0, "ai_x": collections.deque(np.full((4, ), 0), maxlen=4), "ai_y": collections.deque(np.full((4, ), 0), maxlen=4), "ai_score": collections.deque(np.full((4, ), 0), maxlen=4), "ball_x": collections.deque(np.full((4, ), 0), maxlen=4), "ball_y": collections.deque(np.full((4, ), 0), maxlen=4), "com_score": collections.deque(np.full((4, ), 0), maxlen=4), "col_size": collections.deque(np.full((4, ), 6), maxlen=4), "com_x": 36, "com_y": 244, "col_x": 0, "col_y": 0, "distance": collections.deque(np.full((20, ), 100), maxlen=20), } def _calculate_reward(self): reward = 0 distance = math.sqrt( abs(self.game_state["ai_x"][0] - self.game_state["ball_x"][0])**2 + abs(self.game_state["ai_y"][0] - self.game_state["ball_y"][0])**2) self.game_state["distance"].appendleft(int(distance)) # to make ai move lesser #if self.game_state["ai_x"][0] == self.game_state["ai_x"][1]: # reward += 0.1 # collision with ball collision = self.game_state["distance"][0] < 80 and self.game_state[ "distance"][1] < 80 and self.game_state["distance"][ 2] < 80 and self.game_state["distance"][0] > self.game_state[ "distance"][1] and self.game_state["distance"][ 1] < self.game_state["distance"][2] if collision: reward += 0.25 # power hit if self.game_state["col_size"][0] > 0 and self.game_state["distance"][ 0] < 90 and self.game_state["col_y"] != 272: reward += 0.5 # AI gain score if self.game_state["ai_score"][0] > self.game_state["ai_score"][1]: reward += 1 self.collision_counter = np.append(self.collision_counter, self.reward_sum) np.save(self.collision_count_path, self.collision_counter) self.reward_sum = 0 # Com gain score if self.game_state["com_score"][0] > self.game_state["com_score"][1]: reward += -1 self.collision_counter = np.append(self.collision_counter, self.reward_sum) np.save(self.collision_count_path, self.collision_counter) self.reward_sum = 0 if reward > 1: reward = 1 self.game_state["reward"] = reward self.reward_sum += reward return reward