def grab_frames(width, height, x_offset, y_offset, pipeline_string=None): from serpent.frame_grabber import FrameGrabber frame_grabber = FrameGrabber(width=int(width), height=int(height), x_offset=int(x_offset), y_offset=int(y_offset), pipeline_string=pipeline_string) frame_grabber.start()
def grab_frames(width, height, x_offset, y_offset): from serpent.frame_grabber import FrameGrabber frame_grabber = FrameGrabber( width=int(width), height=int(height), x_offset=int(x_offset), y_offset=int(y_offset) ) frame_grabber.start()
def setup_play(self): self.last_frame_had_position = False self.fill_count = 0 self.working_trade = False self.episode_count = 0 self.current_action = None self.last_action = None self.repeat_count = 0 self.sell_point = (743, 406) self.buy_point = (743, 429) self.pull_point = (5, 117) self.held = False self.scraper = T4Scraper(self.game, self.visual_debugger) self.frame_buffer = None self.scraper.current_frame = FrameGrabber.get_frames([0]).frames[0] self.pl = self.scraper.get_pl() self.fill_count = self.scraper.get_position_and_fill_count()[1] game_inputs = {"Buy": 1, "Sell": 2, "Hold": 3} self.ppo_agent = SerpentPPO(frame_shape=(164, 264, 4), game_inputs=game_inputs) try: self.ppo_agent.agent.restore( directory=os.path.join(os.getcwd(), "datasets", "t4dowmodel")) # self.ppo_agent.agent.restore(directory=os.path.join(os.getcwd(), "datasets", "t4simmodel")) except Exception: pass
def setup_play(self): self.game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") print("game_frame_buffer:", self.game_frame_buffer) #self.ppo_agent.generate_action(game_frame_buffer) self.window_dim = (self.game.window_geometry['height'], self.game.window_geometry['width'], 3) self.model = KerasDeepKingdom( time_dim=(self.memory_timeframe, ), game_frame_dim=self.window_dim) # (600, 960, 3))#(360, 627, 3)) print("Screen_regions:", self.game.screen_regions) for region in self.game.screen_regions: print("Region is", region) # TODO: Fix this absolut pointer path = "C:\\SerpentAI\datasets\collect_frames" classes = [ name for name in os.listdir(path) if os.path.isdir(os.path.join(path, name)) and name.__contains__(region + "_") ] print("directory contains: ", classes) if os.path.isfile(region + "_trained_model.h5"): print("Loading model", region, "from file") self.game.api_class.SpriteLocator.load_model(model_name=region, classes=classes) else: print("Building and training", region, "network from scratch") self.game.api_class.SpriteLocator.construct_sprite_locator_network( model_name=region, screen_region=self.game.screen_regions[region], classes=classes) self.game.api_class.SpriteLocator.train_model( classes=classes, model_name=region)
def handle_play(self, game_frame): self.game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") frame_buffer = self.game_frame_buffer.frames for model_name in self.game.api_class.SpriteLocator.sprite_models: print("model is", model_name) print( self.game.api_class.SpriteLocator.sprite_recognized( game_frame=game_frame, screen_region=self.game.screen_regions[model_name], model_name=model_name, #screen_region_frame, classes=self.game.api_class.SpriteLocator. sprite_models[model_name]["classes"])) print(game_frame.frame.shape) for i, game_frame in enumerate(frame_buffer): self.visual_debugger.store_image_data(game_frame.frame, game_frame.frame.shape, str(i)) #self.game_frame_buffer.append(game_frame.frame) #print("game_frame_buffer:", frame_buffer) #if len(frame_buffer) >= self.memory_timeframe: # there is enough frames stored to train the network move_per_timestep = self.model.decide(frame_buffer) score = self.model.evaluate_move(move_per_timestep) self.model.update_weights(frame_buffer, score)
def handle_play(self, game_frame, game_frame_pipeline): valid_game_state = self.environment.update_game_state(game_frame) if not valid_game_state: return None reward = self.reward_aisaac(self.environment.game_state, game_frame) terminal = ( not self.environment.game_state["isaac_alive"] or self.environment.game_state["boss_dead"] or self.environment.episode_over ) self.agent.observe(reward=reward, terminal=terminal) if not terminal: frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") agent_actions = self.agent.generate_actions(frame_buffer) self.environment.perform_input(agent_actions) else: self.environment.clear_input() self.agent.reset() if self.environment.game_state["boss_dead"]: self.analytics_client.track(event_key="BOSS_KILL", data={"foo": "bar"}) self.environment.end_episode() self.environment.new_episode(maximum_steps=960, reset=self.agent.mode.name != "TRAIN")
def setup_play(self): self.fill_count = 0 self.working_trade = False self.episode_count = 0 self.buy_point = (326, 334) self.sell_point = (647, 634) self.scraper = T4Scraper(self.game, self.visual_debugger) self.frame_buffer = None self.scraper.current_frame = FrameGrabber.get_frames([0]).frames[0] self.pl = self.scraper.get_pl() self.fill_count = self.scraper.get_position_and_fill_count()[1] game_inputs = { "Buy": 1, "Sell": 2 } self.ppo_agent = SerpentPPO( frame_shape=(248, 510, 2), game_inputs=game_inputs ) try: self.ppo_agent.agent.restore(directory=os.path.join(os.getcwd(), "datasets", "t4androidmodel")) except Exception: pass
def handle_play(self, game_frame, game_frame_pipeline): self.paused_at = None valid_game_state = self.environment.update_game_state(game_frame) if not valid_game_state: return None reward = self.reward(self.environment.game_state) terminal = ( self.environment.game_state["is_too_slow"] or self.environment.game_state["is_out_of_fuel"] or self.environment.game_state["is_race_over"] or self.environment.episode_over ) self.agent.observe(reward=reward, terminal=terminal) if not terminal: game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") agent_actions = self.agent.generate_actions(game_frame_buffer) self.environment.perform_input(agent_actions) else: self.environment.clear_input() self.agent.reset() if self.environment.game_state["is_race_over"]: time.sleep(5) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(11) if (self.environment.episode + 1) % self.environment.episodes_per_race_track == 0: self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(8) self.game.api.select_random_region_track(self.input_controller) else: self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(1) if (self.environment.episode + 1) % self.environment.episodes_per_race_track == 0: for _ in range(3): self.input_controller.tap_key(KeyboardKey.KEY_S) time.sleep(0.1) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(8) self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(1) self.game.api.select_random_region_track(self.input_controller) self.environment.end_episode() self.environment.new_episode(maximum_steps=2400)
def handle_play(self, game_frame): self.move_time_start = time.time() self.game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") frame_buffer = self.game_frame_buffer.frames context_frame = FrameGrabber.get_frames([0], frame_type="FULL").frames[0] context = self.machine_learning_models["context_classifier"].predict( frame_buffer[0].frame) # context_frame.frame) self.game_state.current_context = context self.game_state.not_playing_context_counter += 1 self.move_time = time.time() - self.move_time_start #print(context) if (context is None or context in ["ofdp_playing", "ofdp_game" ]) and self.game_state.health > 0: # If we are currently playing an episode if self.game_state.read_kill_count() > 10000: # If kill_count gets really large during the episode we should ignore the episode # This happens if the memory address for kill count changes for this episode # and we should just ignore the episode self.agent.printer.print_error() return self.make_a_move(frame_buffer, context_frame) self.game_state.not_playing_context_counter = 0 return else: # This is a hack to avoid runs being ended early due to # context classifier getting the wrong context while playing if self.game_state.not_playing_context_counter < 5: return # Navigate context menu if identified self.do_splash_screen_action(context) self.do_main_menu_actions(context) self.do_mode_menu_action(context) self.do_survival_menu_action(context) self.do_survival_pre_game_action(context) self.do_game_paused_action(context) self.do_game_end_highscore_action(context) self.do_game_end_score_action(context)
def handle_play(self, game_frame): # for i, game_frame in enumerate(self.game_frame_buffer.frames): # self.visual_debugger.store_image_data( # game_frame.frame, # game_frame.frame.shape, # str(i) # ) # self.scraper.current_frame = game_frame if not self.held and self.has_open_positions(): self.last_frame_had_position = True return if self.last_frame_had_position: self.last_frame_had_position = False return self.episode_count += 1 reward = self.reward_agent() self.ppo_agent.observe(reward, terminal=False) self.frame_buffer = game_frame self.frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") self.frame_buffer = self.extract_game_area(self.frame_buffer) self.visual_debugger.store_image_data(self.frame_buffer[0], self.frame_buffer[0].shape, 2) self.visual_debugger.store_image_data(self.frame_buffer[3], self.frame_buffer[3].shape, 3) action, label, game_input = self.ppo_agent.generate_action( self.frame_buffer) print(label) self.current_action = label if game_input == 1: # perform buy self.working_trade = True self.input_controller.move(x=self.buy_point[0], y=self.buy_point[1]) self.input_controller.click() elif game_input == 2: # perform sell self.working_trade = True self.input_controller.move(x=self.sell_point[0], y=self.sell_point[1]) self.input_controller.click() elif game_input == 3: self.held = True
def reset(self): self.frame_buffer = FrameGrabber.get_frames([0], frame_type="PIPELINE") self.frame_buffer = self.extract_game_area(self.frame_buffer) # states = np.stack( # self.frame_buffer, # axis=2 # ) print('GETTING STATES') print(self.frame_buffer[0]) print(self.frame_buffer[0].shape) return self.frame_buffer[0], 0, False, {}
def setup_play(self): self.run_count = 0 self.run_reward = 0 self.observation_count = 0 self.delay_fuzzing_durations = [0.05, 0.1, 0.2] self.delay_fuzzing_observation_cap = 100000 self.performed_inputs = collections.deque(list(), maxlen=8) self.reward_10 = collections.deque(list(), maxlen=10) self.reward_100 = collections.deque(list(), maxlen=100) self.reward_1000 = collections.deque(list(), maxlen=1000) self.average_reward_10 = 0 self.average_reward_100 = 0 self.average_reward_1000 = 0 self.score_10 = collections.deque(list(), maxlen=10) self.score_100 = collections.deque(list(), maxlen=100) self.score_1000 = collections.deque(list(), maxlen=1000) self.average_score_10 = 0 self.average_score_100 = 0 self.average_score_1000 = 0 self.top_score = 0 self.top_score_run = 0 self.previous_score = 0 # Measured on the Serpent.AI Lab Stream (Feb 12 2018) self.random_average_score = 67.51 self.random_top_score = 5351 self.random_runs = 2700 self.death_check = False self.just_relaunched = False self.frame_buffer = None self.ppo_agent = SerpentPPO(frame_shape=(100, 100, 4), game_inputs=self.game_inputs) # Warm Agent? game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") self.ppo_agent.generate_action(game_frame_buffer) self.started_at = datetime.utcnow().isoformat()
def handle_play(self, game_frame, game_frame_pipeline): self.paused_at = None with mss() as sct: monitor_var = sct.monitors[1] monitor = sct.grab(monitor_var) valid_game_state = self.environment.update_startregions_state(monitor) if not valid_game_state: return None reward, over_boolean = self.reward(self.environment.startregions_state, 1.0) terminal = over_boolean self.agent.observe(reward=reward, terminal=terminal) if not terminal: game_frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") agent_actions = self.agent.generate_actions(game_frame_buffer) print("Current Action: ") print(agent_actions) str_agent_actions = str(agent_actions) if "MOVE MOUSE X" in str_agent_actions: set_pos(200, 0) if "MOVE MOUSE Y" in str_agent_actions: set_pos(0, 200) if "MOVE MOUSE XY" in str_agent_actions: set_pos(100, 100) if "MOVE MOUSE X2" in str_agent_actions: set_pos(-200, 0) if "MOVE MOUSE Y2" in str_agent_actions: set_pos(0, -200) if "MOVE MOUSE XY2" in str_agent_actions: set_pos(-100, -100) if "MOVE MOUSE XY3" in str_agent_actions: set_pos(-100, 100) if "MOVE MOUSE XY4" in str_agent_actions: set_pos(100, -100) if "LETHAL" in str_agent_actions: self.input_non_lethal = True self.human() self.environment.perform_input(agent_actions) else: self.environment.clear_input() self.agent.reset() time.sleep(30) #To Do #Choose Loadout (Meduim Range) self.environment.end_episode() self.environment.new_episode(maximum_steps=350) print("New Episode")
def step(self, action): if action == 1: # perform buy self.working_trade = True self.input_controller.move(x=self.buy_point[0], y=self.buy_point[1]) self.input_controller.click() elif action == 2: # perform sell self.working_trade = True self.input_controller.move(x=self.sell_point[0], y=self.sell_point[1]) self.input_controller.click() while self.has_open_positions(): sleep(0.1) frame = FrameGrabber.get_frames([0]).frames[0] self.scraper.current_frame = frame pass reward = self.reward_agent() self.frame_buffer = FrameGrabber.get_frames([0], frame_type="PIPELINE") self.frame_buffer = self.extract_game_area(self.frame_buffer) # states = np.stack( # self.frame_buffer, # axis=2 # ) print('GETTING STATES') # print(states.shape) return self.frame_buffer[0], reward, False, {}
def setup(self, ppo_agent, model_name, metadata_key): self.keys = RedisKeys(metadata_key) self.redis = redis.Redis(port=6001) self.ppo_agent = ppo_agent self.history = list() self.m_k = metadata_key self.buys = 0 self.sells = 0 self.last_frame_had_position = False self.fill_count = 0 self.working_trade = False self.episode_count = 0 self.number_of_trades = 0 self.number_of_wins = 0 self.current_action = None self.last_action = None self.repeat_count = 0 self.sell_point = (671, 447) self.buy_point = (669, 476) self.pull_point = (562, 173) self.held = False self.scraper = T4Scraper(self.s.game, self.s.visual_debugger) self.frame_buffer = None self.scraper.current_frame = FrameGrabber.get_frames([0]).frames[0] self.pl = self.scraper.get_pl() self.fill_count = self.scraper.get_position_and_fill_count()[1] self.model_name = model_name print('AFTER INIT AGENT') try: self.ppo_agent.agent.restore(directory=os.path.join( os.getcwd(), "datasets", self.model_name)) # self.ppo_agent.agent.restore(directory=os.path.join(os.getcwd(), "datasets", "t4simmodel")) except Exception: pass print('AFTER RESTORE') self.get_metadata()
def handle_play(self, game_frame): # append memory data into game state (self.game_state["com_x"], self.game_state["com_y"], self.ai_x, self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc, self.col_size, self.game_state["col_x"], self.game_state["col_y"]) = readInfo() self.game_state["ai_x"].appendleft(self.ai_x) self.game_state["ai_y"].appendleft(self.ai_y) self.game_state["ball_x"].appendleft(self.ball_x) self.game_state["ball_y"].appendleft(self.ball_y) self.game_state["ai_score"].appendleft(self.ai_sc) self.game_state["com_score"].appendleft(self.com_sc) self.game_state["col_size"].appendleft(self.col_size) # judge is-in-game by read pixel value (tricky) self.game_frame_img = FrameGrabber.get_frames([0], frame_type="PIPELINE").frames[0].frame if self.game_frame_img[91, 49] != 0.3607843137254902: self.handle_notInGame() else: self.game_state["playing"] = True self.handle_fight(game_frame)
def handle_data(self, game_frame, game_frame_pipeline): hp_int = self._measure_actor_hp() try: if hp_int < 10: terminal = {1: False} else: terminal = {0: True} except: terminal = terminal = {1: False} self.reward_observe = self.reward_ai() self.agent.observe(reward=self.reward_observe, terminal=terminal) frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") self.game_input = self.agent.generate_actions(frame_buffer) self.game_input = str(self.game_input) print(self.game_input) return self.game_input
def handle_play(self, game_frame): # locate sprite position and existence ''' logo_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_LOGO'], game_frame=game_frame) menu_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_MENU'], game_frame=game_frame) game_set_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_GAME_SET'], game_frame=game_frame) ''' # append memory data into game state (self.game_state["com_x"], self.game_state["com_y"], self.ai_x, self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc, self.col_size, self.game_state["col_x"], self.game_state["col_y"]) = readInfo() self.game_state["ai_x"].appendleft(self.ai_x) self.game_state["ai_y"].appendleft(self.ai_y) self.game_state["ball_x"].appendleft(self.ball_x) self.game_state["ball_y"].appendleft(self.ball_y) self.game_state["ai_score"].appendleft(self.ai_sc) self.game_state["com_score"].appendleft(self.com_sc) self.game_state["col_size"].appendleft(self.col_size) self.handle_frame_process(game_frame) ''' if(logo_locator): print('Entering Logo...') self.game_state["playing"] = False self.handle_menu() elif (menu_locator): print('Entering Menu...') self.game_state["playing"] = False self.handle_menu() elif (game_set_locator): print('Game Set!') self.handle_fight_end(game_frame) ''' # judge is-in-game by read pixel value (tricky) self.game_frame_img = FrameGrabber.get_frames( [0], frame_type="PIPELINE").frames[0].frame if self.game_frame_img[100, 81] != 0.7137254901960784: self.handle_notInGame() else: self.game_state["playing"] = True self.handle_fight(game_frame)
def select_random_track(self, input_controller): input_controller.handle_keys([]) start_world_region = None while start_world_region is None: game_frame_buffer = FrameGrabber.get_frames([0]) game_frame = game_frame_buffer.frames[0] start_world_region = self.identify_world_region(game_frame) end_world_region = random.choice(range(0, 9)) self.go_to_world_region(start_world_region, end_world_region, input_controller) input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) possible_keys = [ KeyboardKey.KEY_W, KeyboardKey.KEY_A, KeyboardKey.KEY_S, KeyboardKey.KEY_D ] for _ in range(30): input_controller.tap_key(random.choice(possible_keys)) time.sleep(0.05) input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) possible_keys = [KeyboardKey.KEY_A, KeyboardKey.KEY_D] for _ in range(30): input_controller.tap_key(random.choice(possible_keys)) time.sleep(0.05) input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1)
def handle_play(self, game_frame, game_frame_pipeline): valid_game_state = self.environment.update_game_state(game_frame) if not valid_game_state: return None move_reward, attack_reward = self.reward_aisaac( self.environment.game_state, game_frame) terminal = (not self.environment.game_state["isaac_alive"] or self.environment.game_state["boss_dead"] or self.environment.episode_over) self.agent.observe(move_reward=move_reward, attack_reward=attack_reward, terminal=terminal, boss_hp=self.environment.game_state["boss_hp"], isaac_hp=self.environment.game_state["isaac_hp"]) if not terminal: #[0, 2, 4, 6] to [0, 2] # 30fps, look at current frame and frame from 1/30 s/f * 4 = 0.13 seconds ago frame_buffer = FrameGrabber.get_frames([0, 2, 4, 6], frame_type="PIPELINE") agent_actions = self.agent.generate_actions(frame_buffer) #print(agent_actions) self.environment.perform_input(agent_actions) else: self.environment.clear_input() self.agent.reset() if self.environment.game_state["boss_dead"]: self.analytics_client.track(event_key="BOSS_KILL", data={"foo": "bar"}) self.environment.end_episode() self.environment.new_episode(maximum_steps=3840, reset=False)
def handle_record(self, game_frame, game_frame_pipeline, **kwargs): game_frame_buffer = FrameGrabber.get_frames(self.frame_offsets, frame_type="PIPELINE") self.game_frame_buffers.append(game_frame_buffer)
def grab_latest_frame(self): game_frame_buffer, game_frame_buffer_pipeline = FrameGrabber.get_frames_with_pipeline( [0]) return game_frame_buffer.frames[0], game_frame_buffer_pipeline.frames[ 0]
def handle_fight(self, game_frame): gc.disable() if self.dqn_action.first_run: self.dqn_action.first_run = False return if self.dqn_action.frame_stack is None: game_frame_buffer = FrameGrabber.get_frames( [0], frame_type="PIPELINE").frames[0] self.dqn_action.build_frame_stack(game_frame_buffer.frame) else: # saving frame pic to analyze #self.cid = self.cid + 1 #game_frame_img = FrameGrabber.get_frames([0], frame_type="PIPELINE").frames[0] #skimage.io.imsave(f"frame{self.cid}.png", game_frame_img.frame) game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") if self.dqn_action.mode == "TRAIN": reward = self._calculate_reward() self.game_state["reward"] += reward self.dqn_action.append_to_replay_memory( game_frame_buffer, reward, terminal=self.game_state["ai_score"][0] == 15) # Every 2000 steps, save latest weights to disk if self.dqn_action.current_step % 1000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"model/fighting_movement") # Every 20000 steps, save weights checkpoint to disk if self.dqn_action.current_step % 10000 == 0: self.dqn_action.save_model_weights( file_path_prefix=f"model/fighting_movement", is_checkpoint=True) elif self.dqn_action.mode == "RUN": self.dqn_action.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print('') print(Fore.YELLOW) print(Style.BRIGHT) print(f"STARTED AT:{self.started_at_str}") print( f"RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} s" ) print(Style.RESET_ALL) #print("") print(Fore.GREEN) print(Style.BRIGHT) print("MOVEMENT NEURAL NETWORK:\n") self.dqn_action.output_step_data() print(Style.RESET_ALL) print(Style.BRIGHT) print(f"CURRENT RUN: {self.game_state['current_run'] }") print("") print( f"CURRENT RUN REWARD: {round(self.game_state['reward'], 4)}") print(f"CURRENT AI SCORE: {self.game_state['ai_score'][0]}") print(f"CURRENT ENEMY SCORE: {self.game_state['com_score'][0]}") print("") print( f"PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}" ) print(Style.RESET_ALL) self.dqn_action.pick_action() self.dqn_action.generate_action() movement_keys = self.dqn_action.get_input_values() print("") print(Fore.GREEN) print(Style.BRIGHT) #print(movement_keys) print("" + " + ".join( list(map(lambda k: self.key_mapping.get(k), movement_keys)))) print(Style.RESET_ALL) print("") print( f"AI: ({self.game_state['ai_x'][0]}, {self.game_state['ai_y'][0]})" ) print( f"COM: ({self.game_state['com_x']}, {self.game_state['com_y']})" ) print( f"BALL: ({self.game_state['ball_x'][0]}, {self.game_state['ball_y'][0]})" ) print( f"Collision: ({self.game_state['col_x']}, {self.game_state['col_y']}, {self.game_state['col_size'][0]})" ) print(f"Distance: {self.game_state['distance'][0]}") self.input_controller.handle_keys(movement_keys) if self.dqn_action.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_action.erode_epsilon(factor=2) self.dqn_action.next_step() self.game_state["current_run"] += 1 if self.game_state['ai_score'][0] == 15 or self.game_state[ 'com_score'][0] == 15: # Game over self.game_state["ai_score"].appendleft(0) self.game_state["com_score"].appendleft(0) self.handle_fight_end(game_frame)
def train_ddqn(self, game_frame): if self.dqn_movement.first_run: self.dqn_movement.first_run = False self.dqn_projectile.first_run = False return None heart = frame_to_hearts(game_frame.frame, self.game) score = self._process_ocr(game_frame) self.get_reward_state(heart, score) if self.dqn_movement.frame_stack is None: pipline_game_frame = FrameGrabber.get_frames( [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64" ).frames[0] print(np.shape(pipline_game_frame.frame)) # self.dqn_movement.build_frame_stack(pipline_game_frame.frame) self.dqn_movement.frame_stack = self._build_frame_stack(pipline_game_frame.frame) self.dqn_projectile.frame_stack = self.dqn_movement.frame_stack else: game_frame_buffer = FrameGrabber.get_frames( # [0, 4, 8, 12], [0], frame_shape=(self.game.frame_height, self.game.frame_width), frame_type="PIPELINE", dtype="float64" ) if self.dqn_movement.mode == "TRAIN": self.game_state["run_reward_movement"] += self.reward self.game_state["run_reward_projectile"] += self.reward self._movement_append_to_replay_memory( game_frame_buffer, self.reward, terminal=self.game_over ) self._projectile_append_to_replay_memory( game_frame_buffer, self.reward, terminal=self.game_over ) #Every 2000 steps, save latest weights to disk if self.dqn_movement.current_step % 2000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/binding_of_isacc_movement" ) self.dqn_projectile.save_model_weights( file_path_prefix=f"datasets/binding_of_isaac_projectile" ) #Every 20000 steps, save weights checkpoint to disk if self.dqn_movement.current_step % 20000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/c_binding_of_isaac_movement", is_checkpoint=True ) self.dqn_projectile.save_model_weights( file_path_prefix=f"datasets/c_binding_of_isaac_projectile", is_checkpoint=True ) elif self.dqn_movement.mode == "RUN": game_frames = [game_frame.frame for game_frame in game_frame_buffer.frames] self.dqn_movement.frame_stack = np.array(game_frames) self.dqn_projectile.frame_stack = np.array(game_frames) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print(f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours," f" {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds") print("MOVEMENT NEURAL NETWORK:\n") self.dqn_movement.output_step_data() print(f"reward:{self.reward}") print("PROJECTILE NEURAL NETWORK:\n") self.dqn_projectile.output_step_data() print(f"CURRENT RUN: {self.game_state['current_run']}") print(f"CURRENT RUN REWARD: " f"{round(self.reward + self.reward, 2)}") print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}") print(f"CURRENT HEALTH: {heart}") print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds") print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds " f"(Run {self.game_state['record_time_alive'].get('run')}, " f"{'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'}") print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds") if self.game_over == True: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_movement.mode in ["TRAIN", "RUN"]: #Check for Records if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN" } else: self.game_state["random_time_alives"].append(self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean(self.game_state["random_time_alives"]) self.game_state["current_run_state"] = 0 self.input_controller.handle_keys([]) if self.dqn_movement.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print(f"TRAINING ON MINI-BATCHES: {i + 1}/16") print(f"NEXT RUN: {self.game_state['current_run'] + 1} " f"{'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}") self.dqn_movement.train_on_mini_batch() self.dqn_projectile.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_movement"] = 0 self.game_state["run_reward_projectile"] = 0 self.game_state["run_predicted_actions"] = 0 self.s_p1 = 16 self.game_over = False self.reward = 0 if self.dqn_movement.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0: self.dqn_movement.update_target_model() self.dqn_projectile.update_target_model() if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0: self.dqn_movement.enter_run_mode() self.dqn_projectile.enter_run_mode() else: self.dqn_movement.enter_train_mode() self.dqn_projectile.enter_train_mode() return None self.dqn_movement.pick_action() self.dqn_movement.generate_action() self.dqn_projectile.pick_action(action_type=self.dqn_movement.current_action_type) self.dqn_projectile.generate_action() try: _thread.start_new_thread(self._execute_action, ("Thread", )) except Exception as e: print(e) if self.dqn_movement.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_movement.erode_epsilon(factor=2) self.dqn_projectile.erode_epsilon(factor=2) self.dqn_movement.next_step() self.dqn_projectile.next_step() self.game_state["current_run_steps"] += 1
def grab_latest_frame(self): game_frame_buffer = FrameGrabber.get_frames( [0], (self.window_geometry.get("height"), self.window_geometry.get("width"), 3)) return game_frame_buffer.frames[0]
def setup_play(self): self.game_inputs = { "MOVE UP": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_W) ], "MOVE LEFT": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_A) ], "MOVE DOWN": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_S) ], "MOVE RIGHT": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_D) ], "MOVE TOP-LEFT": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_W), KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_A) ], "MOVE TOP-RIGHT": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_W), KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_D) ], "MOVE DOWN-LEFT": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_S), KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_A) ], "MOVE DOWN-RIGHT": [ KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_S), KeyboardEvent(KeyboardEvents.DOWN, KeyboardKey.KEY_D) ], "SHOOT UP": [ MouseEvent(MouseEvents.CLICK, MouseButton.LEFT) ], "SHOOT LEFT": [ MouseEvent(MouseEvents.CLICK, MouseButton.RIGHT) ], "DON'T MOVE": [] } self.ppo_agent = SerpentPPO( frame_shape=(125, 112, 4), game_inputs=self.game_inputs ) self.first_run = True self.game_over = False self.run_count = 0 self.run_reward = 0 self.observation_count = 0 self.episode_observation_count = 0 self.performed_inputs = collections.deque(list(), maxlen=8) self.reward_10 = collections.deque(list(), maxlen=10) self.reward_100 = collections.deque(list(), maxlen=100) self.reward_1000 = collections.deque(list(), maxlen=1000) self.rewards = list() self.average_reward_10 = 0 self.average_reward_100 = 0 self.average_reward_1000 = 0 self.top_reward = 0 self.top_reward_run = 0 self.previous_score = 0 self.score_10 = collections.deque(list(), maxlen=10) self.score_100 = collections.deque(list(), maxlen=100) self.score_1000 = collections.deque(list(), maxlen=1000) self.average_score_10 = 0 self.average_score_100 = 0 self.average_score_1000 = 0 self.best_score = 0 self.best_score_run = 0 self.just_relaunched = False self.frame_buffer = None try: self.ppo_agent.agent.restore_model( directory=os.path.join( os.getcwd(), "datasets", "pacai")) self.restore_metadata() except Exception: pass self.analytics_client.track( event_key="INITIALIZE", data=dict( episode_rewards=[])) for reward in self.rewards: self.analytics_client.track( event_key="EPISODE_REWARD", data=dict( reward=reward)) time.sleep(0.01) # Warm Agent? game_frame_buffer = FrameGrabber.get_frames( [0, 1, 2, 3], frame_type="PIPELINE") game_frame_buffer = self.extract_game_area(game_frame_buffer) self.ppo_agent.generate_action(game_frame_buffer) self.score = collections.deque(np.full((16,), 0), maxlen=16) self.lives = collections.deque(np.full((16,), 3), maxlen=16) self.continuity_bonus = 0 self.started_at = datetime.utcnow().isoformat() self.episode_started_at = None self.paused_at = None print("Enter - Auto Save") self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(2) print("Enter - Menu") self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) print("Enter - Start game") self.input_controller.tap_key(KeyboardKey.KEY_ENTER) time.sleep(1) # Make sure to initialize Game() after passing the Start game menu, # otherwise the pointers may not be fully loaded. self.game_data = Game() return
def handle_play(self, game_frame): if self.first_run: self.run_count += 1 self.first_run = False self.episode_started_at = time.time() return None self.printer.add("") self.printer.add("Log234 - Pac-AI") self.printer.add("Reinforcement Learning: Training a PPO Agent") self.printer.add("") self.printer.add(f"Stage Started At: {self.started_at}") self.printer.add(f"Current Run: #{self.run_count}") self.printer.add("") if self.game_data.IsPaused(): if self.paused_at is None: self.paused_at = time.time() # Give ourselves 30 seconds to work with if time.time() - self.paused_at >= 30: self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) time.sleep(1) return self.printer.add("The game is paused.") self.printer.flush() return else: self.paused_at = None self.score.appendleft(self.game_data.GetScore()) self.printer.add(f"Score: {self.score[0]}") self.lives.appendleft(self.game_data.GetLives()) self.printer.add(f"Lives: {self.lives[0]}") reward = self.reward_agent() self.printer.add(f"Current Reward: {round(reward, 2)}") self.printer.add(f"Run Reward: {round(self.run_reward, 2)}") self.printer.add("") if self.frame_buffer is not None: self.run_reward += reward self.observation_count += 1 self.episode_observation_count += 1 self.analytics_client.track( event_key="RUN_REWARD", data=dict( reward=reward)) if self.ppo_agent.agent.batch_count == self.ppo_agent.agent.batch_size - 1: self.printer.flush() self.printer.add("") self.printer.add("Updating Pac-AI Model With New Data... ") self.printer.flush() self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) self.ppo_agent.observe( reward, terminal=( self.game_data.IsOver())) self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE) self.frame_buffer = None if not self.game_data.IsOver(): time.sleep(1) return None else: self.ppo_agent.observe( reward, terminal=( self.game_data.IsOver())) self.printer.add(f"Observation Count: {self.observation_count}") self.printer.add( f"Episode Observation Count: {self.episode_observation_count}") self.printer.add( f"Current Batch Size: {self.ppo_agent.agent.batch_count}") self.printer.add("") if not self.game_data.IsOver(): self.death_check = False self.printer.add( f"Continuity Bonus: {round(self.continuity_bonus, 2)}") self.printer.add("") self.printer.add( f"Average Rewards (Last 10 Runs): {round(self.average_reward_10, 2)}") self.printer.add( f"Average Rewards (Last 100 Runs): {round(self.average_reward_100, 2)}") self.printer.add( f"Average Rewards (Last 1000 Runs): {round(self.average_reward_1000, 2)}") self.printer.add("") self.printer.add( f"Top Run Reward: {round(self.top_reward, 2)} (Run #{self.top_reward_run})") self.printer.add("") self.printer.add( f"Previous Run Score: {round(self.previous_score, 2)}") self.printer.add("") self.printer.add( f"Average Score (Last 10 Runs): {round(self.average_score_10, 2)}") self.printer.add( f"Average Score (Last 100 Runs): {round(self.average_score_100, 2)}") self.printer.add( f"Average Score (Last 1000 Runs): {round(self.average_score_1000, 2)}") self.printer.add("") self.printer.add( f"Best Score: {round(self.best_score, 2)} (Run #{self.best_score_run})") self.printer.add("") self.printer.add("Latest Inputs:") self.printer.add("") for i in self.performed_inputs: self.printer.add(i) self.printer.flush() self.frame_buffer = FrameGrabber.get_frames( [0, 1, 2, 3], frame_type="PIPELINE") self.frame_buffer = self.extract_game_area(self.frame_buffer) action, label, game_input = self.ppo_agent.generate_action( self.frame_buffer) self.performed_inputs.appendleft(label) self.input_controller.handle_keys(game_input) else: self.input_controller.handle_keys([]) self.analytics_client.track( event_key="RUN_END", data=dict( run=self.run_count)) self.printer.add("Game Over.") self.printer.flush() self.run_count += 1 self.reward_10.appendleft(self.run_reward) self.reward_100.appendleft(self.run_reward) self.reward_1000.appendleft(self.run_reward) self.rewards.append(self.run_reward) self.average_reward_10 = float(np.mean(self.reward_10)) self.average_reward_100 = float(np.mean(self.reward_100)) self.average_reward_1000 = float(np.mean(self.reward_1000)) if self.run_reward > self.top_reward: self.top_reward = self.run_reward self.top_reward_run = self.run_count - 1 self.analytics_client.track( event_key="NEW_RECORD", data=dict( type="REWARD", value=self.run_reward, run=self.run_count - 1)) self.analytics_client.track( event_key="EPISODE_REWARD", data=dict( reward=self.run_reward)) self.previous_score = max(list(self.score)[:4]) self.run_reward = 0 self.score_10.appendleft(self.previous_score) self.score_100.appendleft(self.previous_score) self.score_1000.appendleft(self.previous_score) self.average_score_10 = float(np.mean(self.score_10)) self.average_score_100 = float(np.mean(self.score_100)) self.average_score_1000 = float(np.mean(self.score_1000)) if self.previous_score > self.best_score: self.best_score = self.previous_score self.best_score_run = self.run_count - 1 self.analytics_client.track( event_key="NEW_RECORD", data=dict( type="score", value=self.previous_score, run=self.run_count - 1)) if not self.run_count % 10: self.ppo_agent.agent.save_model( directory=os.path.join( os.getcwd(), "datasets", "pacai", "ppo_model"), append_timestep=False) self.dump_metadata() self.lives = collections.deque(np.full((16,), 3), maxlen=16) self.score = collections.deque(np.full((16,), 0), maxlen=16) self.multiplier_damage = 0 self.performed_inputs.clear() self.frame_buffer = None self.input_controller.tap_key(KeyboardKey.KEY_ENTER, duration=1.5) self.episode_started_at = time.time() self.episode_observation_count = 0
def handle_play_ddqn(self, game_frame): gc.disable() if self.dqn_movement.first_run: self.input_controller.tap_key(KeyboardKey.KEY_W) self.dqn_movement.first_run = False time.sleep(5) return None dragon_alive = self._measure_dragon_alive(game_frame) # dragon_coins = self._measure_dragon_coins(game_frame) self.game_state["alive"].appendleft(dragon_alive) # self.game_state["coins"].appendleft(dragon_coins) if self.dqn_movement.frame_stack is None: # pipeline_game_frame = FrameGrabber.get_frames( # [0], # frame_shape=game_frame.frame.shape, # frame_type="MINI" # ).frames[0] self.dqn_movement.build_frame_stack(game_frame.ssim_frame) else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=game_frame.frame.shape, frame_type="MINI" ) if self.dqn_movement.mode == "TRAIN": reward = self._calculate_reward() self.game_state["run_reward"] += reward self.dqn_movement.append_to_replay_memory( game_frame_buffer, reward, terminal=self.game_state["alive"] == 0 ) # Every 2000 steps, save latest weights to disk if self.dqn_movement.current_step % 2000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/cloney_movement" ) # Every 20000 steps, save weights checkpoint to disk if self.dqn_movement.current_step % 20000 == 0: self.dqn_movement.save_model_weights( file_path_prefix=f"datasets/cloney_movement", is_checkpoint=True ) elif self.dqn_movement.mode == "RUN": self.dqn_movement.update_frame_stack(self.game_frame_buffer) run_time = datetime.now() - self.started_at print("\033c" + f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds") print("") print("MOVEMENT NEURAL NETWORK:\n") self.dqn_movement.output_step_data() print("") print(f"CURRENT RUN: {self.game_state['current_run']}") print(f"CURRENT RUN REWARD: {round(self.game_state['run_reward'], 2)}") print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}") print(f"CURRENT DRAGON ALIVE: {self.game_state['alive'][0]}") # print(f"CURRENT DRAGON COINS: {self.game_state['coins'][0]}) print("") # print(f"AVERAGE ACTIONS PER SECOND: {round(self.game_state['average_aps'], 2)}") print("") print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds") # print(f"LAST RUN COINS: {self.game_state['last_run_coins'][0]}) print("") print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})") # print(f"RECORD COINS COLLECTED: {self.game_state['record_coins_collected'].get('value')} coins (Run {self.game_state['record_coins_collected'].get('run')}, {'Predicted' if self.game_state['record_coins_collected'].get('predicted') else 'Training'})") print("") print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds") if self.game_state["alive"][1] <= 0: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() # Set display stuff TODO timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_movement.mode in ["TRAIN", "RUN"]: # Check for Records if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_movement.mode == "RUN" } # if self.game_state["coins"][0] < self.game_state["record_coins_collected"].get("value", 1000): # self.game_state["record_coins_collected"] = { # "value": self.game_state["coins"][0], # "run": self.game_state["current_run"], # "predicted": self.dqn_movement.mode == "RUN" # } else: self.game_state["random_time_alives"].append(self.game_state["last_run_duration"]) self.game_state["random_time_alive"] = np.mean(self.game_state["random_time_alives"]) self.game_state["current_run_steps"] = 0 self.input_controller.release_key(KeyboardKey.KEY_SPACE) if self.dqn_movement.mode == "TRAIN": for i in range(8): serpent.utilities.clear_terminal() print(f"TRAINING ON MINI-BATCHES: {i + 1}/8") print(f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}") self.dqn_movement.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_movement"] = 0 self.game_state["run_predicted_actions"] = 0 self.game_state["alive"] = collections.deque(np.full((8,), 4), maxlen=8) # self.game_state["coins"] = collections.deque(np.full((8,), 0), maxlen=8) if self.dqn_movement.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0: if self.dqn_movement.type == "DDQN": self.dqn_movement.update_target_model() if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0: self.dqn_movement.enter_run_mode() else: self.dqn_movement.enter_train_mode() self.input_controller.tap_key(KeyboardKey.KEY_SPACE) time.sleep(5) return None self.dqn_movement.pick_action() self.dqn_movement.generate_action() keys = self.dqn_movement.get_input_values() print("") print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), keys)))) self.input_controller.handle_keys(keys) if self.dqn_movement.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.dqn_movement.erode_epsilon(factor=2) self.dqn_movement.next_step() self.game_state["current_run_steps"] += 1
def update_game_frame(self, frame_type="FULL"): game_frame_buffer = FrameGrabber.get_frames([0], frame_type=frame_type) return game_frame_buffer.frames[0]
def handle_play(self, game_frame): self.isDescending = self.ascendDescend(game_frame) self.currentHP = self.computeActualHP(game_frame) self.falling(game_frame) for i, game_frame in enumerate(self.game_frame_buffer.frames): self.visual_debugger.store_image_data( game_frame.frame, game_frame.frame.shape, str(i) ) if self.dqn_main_player.frame_stack is None: pipeline_game_frame = FrameGrabber.get_frames( [0], frame_shape=(100, 100), frame_type="PIPELINE", dtype="float64" ).frames[0] self.dqn_main_player.build_frame_stack(pipeline_game_frame.frame) self.dqn_buddy_player.frame_stack = self.dqn_main_player.frame_stack else: game_frame_buffer = FrameGrabber.get_frames( [0, 4, 8, 12], frame_shape=(100, 100), frame_type="PIPELINE", dtype="float64" ) reward = self.calculate_reward() if self.dqn_main_player.mode == "TRAIN": self.game_state["run_reward_main"] += reward self.game_state["run_reward_buddy"] += reward self.dqn_main_player.append_to_replay_memory( game_frame_buffer, reward, terminal= self.currentHP == 0 ) self.dqn_buddy_player.append_to_replay_memory( game_frame_buffer, reward, terminal= self.currentHP == 0 ) # Every 2000 steps, save latest weights to disk if self.dqn_main_player.current_step % 2000 == 0: self.dqn_main_player.save_model_weights( file_path_prefix= "datasets/dqn/dqn_main/" ) self.dqn_buddy_player.save_model_weights( file_path_prefix=f"datasets/dqn/dqn_buddy/" ) # Every 20000 steps, save weights checkpoint to disk if self.dqn_main_player.current_step % 20000 == 0: self.dqn_main_player.save_model_weights( file_path_prefix= "datasets/dqn/dqn_main/", is_checkpoint=True ) self.dqn_buddy_player.save_model_weights( file_path_prefix= "datasets/dqn/dqn_buddy/", is_checkpoint=True ) elif self.dqn_main_player.mode == "RUN": self.dqn_main_player.update_frame_stack(game_frame_buffer) self.dqn_buddy_player.update_frame_stack(game_frame_buffer) run_time = datetime.now() - self.started_at serpent.utilities.clear_terminal() print(f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds") print("") print("MAIN NEURAL NETWORK:\n") self.dqn_main_player.output_step_data() print("") print("BUDDY NEURAL NETWORK:\n") self.dqn_buddy_player.output_step_data() print("") print(f"CURRENT RUN: {self.game_state['current_run']}") print(f"CURRENT RUN REWARD: {round(self.game_state['run_reward_main'] + self.game_state['run_reward_buddy'] , 2)}") print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}") print(f"CURRENT HEALTH: {self.currentHP}") print("") print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds") print("") print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'} ") print("") print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds") if self.currentHP <= 0: serpent.utilities.clear_terminal() timestamp = datetime.utcnow() gc.enable() gc.collect() gc.disable() timestamp_delta = timestamp - self.game_state["run_timestamp"] self.game_state["last_run_duration"] = timestamp_delta.seconds if self.dqn_main_player.mode in ["TRAIN","RUN"]: #Check for Records if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0): self.game_state["record_time_alive"] = { "value": self.game_state["last_run_duration"], "run": self.game_state["current_run"], "predicted": self.dqn_main_player.mode == "RUN" } self.game_state["current_run_steps"] = 0 self.input_controller.handle_keys([]) if self.dqn_main_player.mode == "TRAIN": for i in range(16): serpent.utilities.clear_terminal() print(f"TRAINING ON MINI-BATCHES: {i + 1}/16") print(f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}") self.dqn_main_player.train_on_mini_batch() self.dqn_buddy_player.train_on_mini_batch() self.game_state["run_timestamp"] = datetime.utcnow() self.game_state["current_run"] += 1 self.game_state["run_reward_main"] = 0 self.game_state["run_reward_buddy"] = 0 self.game_state["run_predicted_actions"] = 0 self.restartLevel() if self.dqn_main_player.mode in ["TRAIN", "RUN"]: if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0: self.dqn_main_player.update_target_model() self.dqn_buddy_player.update_target_model() if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0: self.dqn_main_player.enter_run_mode() self.dqn_buddy_player.enter_run_mode() else: self.dqn_main_player.enter_train_mode() self.dqn_buddy_player.enter_train_mode() return None if(self.actualStep%2 == 0): self.dqn_main_player.pick_action() self.dqn_main_player.generate_action() movement_keys = self.dqn_main_player.get_input_values() print("") print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), movement_keys)))) self.input_controller.handle_keys(movement_keys) self.dqn_main_player.erode_epsilon(factor=2) self.dqn_main_player.next_step() #time.sleep(1) else: self.dqn_buddy_player.pick_action() self.dqn_buddy_player.generate_action() movement_keys_buddy = self.dqn_buddy_player.get_input_values() print("") print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name),movement_keys_buddy)))) self.input_controller.handle_keys(movement_keys_buddy) self.dqn_buddy_player.erode_epsilon(factor=2) self.dqn_buddy_player.next_step() #time.sleep(1) #movement_keys = self.dqn_main_player.get_input_values() #movement_keys_buddy = self.dqn_buddy_player.get_input_values() #print("") #print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), movement_keys + movement_keys_buddy)))) #self.input_controller.handle_keys(movement_keys + movement_keys_buddy) if self.dqn_main_player.current_action_type == "PREDICTED": self.game_state["run_predicted_actions"] += 1 self.game_state["current_run_steps"] += 1 self.actualStep += 1