def save(self): utils.remove_dir(self.path) utils.make_dir(self.path) foo = lambda model, params, name: save_model( model, os.path.join(self.path, name + MODEL_STATE_EXT), params ) self._apply(foo, self.model_names + ['stacker'])
if done: break rewards.append(ep_reward) if ma_rewards: ma_rewards.append(ma_rewards[-1] * 0.9 + ep_reward * 0.1) else: ma_rewards.append(ep_reward) print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") print('完成测试!') return rewards, ma_rewards if __name__ == "__main__": cfg = DQNConfig() plot_cfg = PlotConfig() # 训练 env, agent = env_agent_config(cfg, seed=1) rewards, ma_rewards = train(cfg, env, agent) make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 agent.save(path=plot_cfg.model_path) # 保存模型 save_results(rewards, ma_rewards, tag='train', path=plot_cfg.result_path) # 保存结果 plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 # 测试 env, agent = env_agent_config(cfg, seed=10) agent.load(path=plot_cfg.model_path) # 导入模型 rewards, ma_rewards = test(cfg, env, agent) save_results(rewards, ma_rewards, tag='test', path=plot_cfg.result_path) # 保存结果 plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果
rewards.append(ep_reward) if ma_rewards: ma_rewards.append(0.9 * ma_rewards[-1] + 0.1 * ep_reward) else: ma_rewards.append(ep_reward) print('Complete training!') return rewards, ma_rewards if __name__ == "__main__": cfg = HierarchicalDQNConfig() # train env, agent = env_agent_config(cfg, seed=1) rewards, ma_rewards = train(cfg, env, agent) make_dir(cfg.result_path, cfg.model_path) agent.save(path=cfg.model_path) save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) plot_rewards(rewards, ma_rewards, tag="train", algo=cfg.algo, path=cfg.result_path) # eval env, agent = env_agent_config(cfg, seed=10) agent.load(path=cfg.model_path) rewards, ma_rewards = eval(cfg, env, agent) save_results(rewards, ma_rewards, tag='eval', path=cfg.result_path) plot_rewards(rewards, ma_rewards, tag="eval",
'/'+curr_time+'/models/' # 保存模型的路径 self.save = True # 是否保存图片 self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # 检测GPU def env_agent_config(cfg, seed=1): env = NormalizedActions(gym.make(cfg.env_name)) # 装饰action噪声 env.seed(seed) # 随机种子 state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] agent = DDPG(state_dim, action_dim, cfg) return env, agent cfg = DDPGConfig() plot_cfg = PlotConfig() # 训练 env, agent = env_agent_config(cfg, seed=1) rewards, ma_rewards = train(cfg, env, agent) make_dir(plot_cfg.result_path, plot_cfg.model_path) agent.save(path=plot_cfg.model_path) save_results(rewards, ma_rewards, tag='train', path=plot_cfg.result_path) plot_rewards(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 # 测试 env, agent = env_agent_config(cfg, seed=10) agent.load(path=plot_cfg.model_path) rewards, ma_rewards = test(plot_cfg, env, agent) save_results(rewards, ma_rewards, tag='test', path=cfg.result_path) plot_rewards(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果
if done: break rewards.append(ep_reward) if ma_rewards: ma_rewards.append(ma_rewards[-1] * 0.9 + ep_reward * 0.1) else: ma_rewards.append(ep_reward) print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") print('完成测试!') env.close() return rewards, ma_rewards if __name__ == "__main__": cfg = Config() # 训练 env, agent = env_agent_config(cfg) rewards, ma_rewards = train(cfg, env, agent) make_dir(cfg.result_path, cfg.model_path) # 创建保存结果和模型路径的文件夹 agent.save(path=cfg.model_path) # 保存模型 save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) # 保存结果 plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果 # 测试 env, agent = env_agent_config(cfg) agent.load(path=cfg.model_path) # 导入模型 rewards, ma_rewards = test(cfg, env, agent) save_results(rewards, ma_rewards, tag='test', path=cfg.result_path) # 保存结果 plot_rewards(rewards, ma_rewards, cfg, tag="test") # 画出结果
curr_path = os.path.dirname(__file__) parent_path = os.path.dirname(curr_path) sys.path.append(parent_path) # add current terminal path to sys.path import gym import torch import datetime from DQN.agent import DQN from common.plot import plot_rewards from common.utils import save_results, make_dir, del_empty_dir SEQUENCE = datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") # obtain current time SAVED_MODEL_PATH = curr_path + "/saved_model/" + SEQUENCE + '/' # path to save model RESULT_PATH = curr_path + "/results/" + SEQUENCE + '/' # path to save rewards make_dir(curr_path + "/saved_model/", curr_path + "/results/") del_empty_dir(curr_path + "/saved_model/", curr_path + "/results/") class DQNConfig: def __init__(self): self.env = 'LunarLander-v2' self.algo = "DQN" # name of algo self.gamma = 0.95 self.epsilon_start = 1 # e-greedy策略的初始epsilon self.epsilon_end = 0.01 self.epsilon_decay = 500 self.lr = 0.0001 # learning rate self.memory_capacity = 1000000 # Replay Memory容量 self.batch_size = 64 self.train_eps = 300 # 训练的episode数目
agent.update() if i_episode % cfg.target_update == 0: agent.target_net.load_state_dict(agent.policy_net.state_dict()) print('Episode:{}/{}, Reward:{}'.format(i_episode + 1, cfg.train_eps, ep_reward)) rewards.append(ep_reward) # 计算滑动窗口的reward if ma_rewards: ma_rewards.append(0.9 * ma_rewards[-1] + 0.1 * ep_reward) else: ma_rewards.append(ep_reward) print('Complete training!') return rewards, ma_rewards if __name__ == "__main__": cfg = DQNConfig() env = gym.make(cfg.env) env.seed(1) state_dim = env.observation_space.shape[0] action_dim = env.action_space.n agent = DQN(state_dim, action_dim, cfg) rewards, ma_rewards = train(cfg, env, agent) make_dir(cfg.result_path) agent.save(path=cfg.result_path) save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) plot_rewards(rewards, ma_rewards, tag="train", algo=cfg.algo, path=cfg.result_path)
def get_summary_writer(): name = str(datetime.datetime.now())[:19] utils.make_dir(PATH['TF_LOGS']) logs_path = os.path.join(PATH['TF_LOGS'], name) return SummaryWriter(logs_path)