def __init__(self, save_memory_path=None, load_memory_path=None, save_weights_path=None, load_weights_path=None): self.save_memory_path = save_memory_path # 指定记忆/经验保存的路径。默认为None,不保存。 self.load_memory_path = load_memory_path # 指定记忆/经验加载的路径。默认为None,不加载。 self.sekiro_agent = Sekiro_Agent( save_weights_path=save_weights_path, # 指定模型权重保存的路径。默认为None,不保存。 load_weights_path=load_weights_path # 指定模型权重加载的路径。默认为None,不加载。 ) if not save_weights_path: # 注:默认也是测试模式,若设置该参数,就会开启训练模式 self.train = False self.sekiro_agent.step = self.sekiro_agent.replay_start_size + 1 else: self.train = True self.reward_system = RewardSystem() self.i = 0 # 计步器 self.screens = deque(maxlen=in_depth * 2) # 用双端队列存放图像 if self.load_memory_path: self.load_memory() # 加载记忆/经验
def __init__(self, save_memory_path=None, load_memory_path=None, save_weights_path=None, load_weights_path=None): self.save_memory_path = save_memory_path # Specify the path to save the memory/experience. The default is None, do not save. self.load_memory_path = load_memory_path # Specify the path to load the memory/experience. The default is None, do not load. self.sekiro_agent = Sekiro_Agent( save_weights_path= save_weights_path, # Specify the path to save the model weight. The default is None, do not save. load_weights_path= load_weights_path # Specify the path to load the model weight. The default is None, do not load. ) if not save_weights_path: # Note: The default is also the test mode, if you set this parameter, the training mode will be turned on. self.train = False self.sekiro_agent.step = self.sekiro_agent.replay_start_size + 1 else: self.train = True self.reward_system = RewardSystem() self.i = 0 # Pedometer self.screens = deque(maxlen=in_depth * 2) # Use deque to store images. if self.load_memory_path: self.load_memory() # load the memory/experience.
class Play_Sekiro_Offline: def __init__(self, lr, batch_size, load_memory_path, save_weights_path, load_weights_path=None): self.sekiro_agent = Sekiro_Agent( lr=lr, # learning batch_size=batch_size, # Number of samples drawn load_weights_path= load_weights_path, # Specify the path to save the model weight. The default is None, do not save. save_weights_path= save_weights_path # Specify the path to load the model weight. The default is None, do not load. ) self.load_memory_path = load_memory_path # Specify the path to load the memory/experience. The default is None, do not load. self.load_memory() def load_memory(self): if os.path.exists(self.load_memory_path): last_time = time.time() self.sekiro_agent.replayer.memory = pd.read_json( self.load_memory_path ) # load the memory/experience from json file. print( f'Load {self.load_memory_path}. Took {round(time.time()-last_time, 3):>5} seconds.' ) self.sekiro_agent.replayer.count = self.sekiro_agent.replayer.memory.action.count( ) else: print('No memory to load.') def run(self): paused = True print("Ready!") while True: keys = key_check() if paused: if 'T' in keys: paused = False print('\nStarting!') else: # After pressing'T', it will enter here immediately in the next round self.sekiro_agent.learn(verbose=1) print(f'\r step:{self.sekiro_agent.step:>6}', end='') if 'P' in keys: break self.sekiro_agent.save_evaluate_network()
class Play_Sekiro_Offline: def __init__(self, lr, batch_size, load_memory_path, save_weights_path, load_weights_path=None): self.sekiro_agent = Sekiro_Agent( lr=lr, # 学习率 batch_size=batch_size, # 样本抽取数量 load_weights_path=load_weights_path, # 指定模型权重保存的路径。默认为None,不保存。 save_weights_path=save_weights_path # 指定模型权重加载的路径。默认为None,不加载。 ) self.load_memory_path = load_memory_path # 指定记忆/经验加载的路径。默认为None,不加载。 self.load_memory() def load_memory(self): if os.path.exists(self.load_memory_path): last_time = time.time() self.sekiro_agent.replayer.memory = pd.read_json( self.load_memory_path) # 从json文件加载记忆/经验。 print( f'Load {self.load_memory_path}. Took {round(time.time()-last_time, 3):>5} seconds.' ) self.sekiro_agent.replayer.count = self.sekiro_agent.replayer.memory.action.count( ) else: print('No memory to load.') def run(self): paused = True print("Ready!") while True: keys = key_check() if paused: if 'T' in keys: paused = False print('\nStarting!') else: # 按 'T' 之后,马上进入下一轮就进入这里 self.sekiro_agent.learn(verbose=1) print(f'\r step:{self.sekiro_agent.step:>6}', end='') if 'P' in keys: break self.sekiro_agent.save_evaluate_network() # 学习完毕,保存网络权重
def __init__(self, lr, batch_size, load_memory_path, save_weights_path, load_weights_path=None): self.sekiro_agent = Sekiro_Agent( lr=lr, # 学习率 batch_size=batch_size, # 样本抽取数量 load_weights_path=load_weights_path, # 指定模型权重保存的路径。默认为None,不保存。 save_weights_path=save_weights_path # 指定模型权重加载的路径。默认为None,不加载。 ) self.load_memory_path = load_memory_path # 指定记忆/经验加载的路径。默认为None,不加载。 self.load_memory()
def __init__(self, lr, batch_size, load_memory_path, save_weights_path, load_weights_path=None): self.sekiro_agent = Sekiro_Agent( lr=lr, # learning batch_size=batch_size, # Number of samples drawn load_weights_path= load_weights_path, # Specify the path to save the model weight. The default is None, do not save. save_weights_path= save_weights_path # Specify the path to load the model weight. The default is None, do not load. ) self.load_memory_path = load_memory_path # Specify the path to load the memory/experience. The default is None, do not load. self.load_memory()
class Play_Sekiro_Online: def __init__(self, save_memory_path=None, load_memory_path=None, save_weights_path=None, load_weights_path=None): self.save_memory_path = save_memory_path # 指定记忆/经验保存的路径。默认为None,不保存。 self.load_memory_path = load_memory_path # 指定记忆/经验加载的路径。默认为None,不加载。 self.sekiro_agent = Sekiro_Agent( save_weights_path=save_weights_path, # 指定模型权重保存的路径。默认为None,不保存。 load_weights_path=load_weights_path # 指定模型权重加载的路径。默认为None,不加载。 ) if not save_weights_path: # 注:默认也是测试模式,若设置该参数,就会开启训练模式 self.train = False self.sekiro_agent.step = self.sekiro_agent.replay_start_size + 1 else: self.train = True self.reward_system = RewardSystem() self.i = 0 # 计步器 self.screens = deque(maxlen=in_depth * 2) # 用双端队列存放图像 if self.load_memory_path: self.load_memory() # 加载记忆/经验 def load_memory(self): if os.path.exists(self.load_memory_path): last_time = time.time() self.sekiro_agent.replayer.memory = pd.read_json( self.load_memory_path) # 从json文件加载记忆/经验。 print( f'Load {self.load_memory_path}. Took {round(time.time()-last_time, 3):>5} seconds.' ) i = self.sekiro_agent.replayer.memory.action.count() self.sekiro_agent.replayer.i = i self.sekiro_agent.replayer.count = i self.sekiro_agent.step = i else: print('No memory to load.') def get_S(self): for _ in range(in_depth): self.screens.append(get_screen()) # 先进先出,右进左出 def img_processing(self, screens): return np.array([ cv2.resize( roi(cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY), x, x_w, y, y_h), (in_height, in_width)) for screen in screens ]) def round(self): observation = self.img_processing(list(self.screens)[:in_depth]) # S action = self.action = self.sekiro_agent.choose_action( observation) # A self.get_S() reward = self.reward_system.get_reward( cur_status=get_status(list(self.screens)[in_depth - 1])[:4], next_status=get_status(list(self.screens)[in_depth * 2 - 1])[:4]) # R next_observation = self.img_processing(list( self.screens)[in_depth:]) # S' if self.train: self.sekiro_agent.replayer.store(observation, action, reward, next_observation) if self.sekiro_agent.replayer.count >= self.sekiro_agent.replay_start_size: self.sekiro_agent.learn() def run(self): paused = True print("Ready!") while True: last_time = time.time() keys = key_check() if paused: if 'T' in keys: self.get_S() paused = False print('\nStarting!') else: # 按 'T' 之后,马上下一轮就进入这里 self.i += 1 self.round() print( f'\r {self.sekiro_agent.who_play:>4} , step: {self.i:>6} . Loop took {round(time.time()-last_time, 3):>5} seconds. action {self.action:>1} , total_reward: {self.reward_system.total_reward:>10.3f} , memory: {self.sekiro_agent.replayer.count:7>} .', end='') if 'P' in keys: if self.train: self.sekiro_agent.save_evaluate_network( ) # 学习完毕,保存网络权重 self.sekiro_agent.replayer.memory.to_json( self.save_memory_path) # 保存经验 self.reward_system.save_reward_curve( ) # 绘制 reward 曲线并保存在当前目录 break print('\nDone!')
class Play_Sekiro_Online: def __init__(self, save_memory_path=None, load_memory_path=None, save_weights_path=None, load_weights_path=None): self.save_memory_path = save_memory_path # Specify the path to save the memory/experience. The default is None, do not save. self.load_memory_path = load_memory_path # Specify the path to load the memory/experience. The default is None, do not load. self.sekiro_agent = Sekiro_Agent( save_weights_path= save_weights_path, # Specify the path to save the model weight. The default is None, do not save. load_weights_path= load_weights_path # Specify the path to load the model weight. The default is None, do not load. ) if not save_weights_path: # Note: The default is also the test mode, if you set this parameter, the training mode will be turned on. self.train = False self.sekiro_agent.step = self.sekiro_agent.replay_start_size + 1 else: self.train = True self.reward_system = RewardSystem() self.i = 0 # Pedometer self.screens = deque(maxlen=in_depth * 2) # Use deque to store images. if self.load_memory_path: self.load_memory() # load the memory/experience. def load_memory(self): if os.path.exists(self.load_memory_path): last_time = time.time() self.sekiro_agent.replayer.memory = pd.read_json( self.load_memory_path ) # load the memory/experience from json file. print( f'Load {self.load_memory_path}. Took {round(time.time()-last_time, 3):>5} seconds.' ) i = self.sekiro_agent.replayer.memory.action.count() self.sekiro_agent.replayer.i = i self.sekiro_agent.replayer.count = i self.sekiro_agent.step = i else: print('No memory to load.') def get_S(self): for _ in range(in_depth): self.screens.append( get_screen()) # First in first out, right in left out. def img_processing(self, screens): return np.array([ cv2.resize( roi(cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY), x, x_w, y, y_h), (in_height, in_width)) for screen in screens ]) def round(self): observation = self.img_processing(list(self.screens)[:in_depth]) # S action = self.action = self.sekiro_agent.choose_action( observation) # A self.get_S() reward = self.reward_system.get_reward( cur_status=get_status(list(self.screens)[in_depth - 1])[:4], next_status=get_status(list(self.screens)[in_depth * 2 - 1])[:4]) # R next_observation = self.img_processing(list( self.screens)[in_depth:]) # S' if self.train: self.sekiro_agent.replayer.store(observation, action, reward, next_observation) if self.sekiro_agent.replayer.count >= self.sekiro_agent.replay_start_size: self.sekiro_agent.learn() def run(self): paused = True print("Ready!") while True: last_time = time.time() keys = key_check() if paused: if 'T' in keys: self.get_S() paused = False print('\nStarting!') else: # After pressing'T', it will enter here immediately in the next round self.i += 1 self.round() print( f'\r {self.sekiro_agent.who_play:>4} , step: {self.i:>6} . Loop took {round(time.time()-last_time, 3):>5} seconds. action {self.action:>1} , total_reward: {self.reward_system.total_reward:>10.3f} , memory: {self.sekiro_agent.replayer.count:7>} .', end='') if 'P' in keys: if self.train: self.sekiro_agent.save_evaluate_network() self.sekiro_agent.replayer.memory.to_json( self.save_memory_path) self.reward_system.save_reward_curve() break print('\nDone!')