Exemple #1
0
 def save(self):
     utils.remove_dir(self.path)
     utils.make_dir(self.path)
     foo = lambda model, params, name: save_model(
             model,
             os.path.join(self.path, name + MODEL_STATE_EXT),
             params
         )
     self._apply(foo, self.model_names + ['stacker'])
Exemple #2
0
            if done:
                break
        rewards.append(ep_reward)
        if ma_rewards:
            ma_rewards.append(ma_rewards[-1] * 0.9 + ep_reward * 0.1)
        else:
            ma_rewards.append(ep_reward)
        print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}")
    print('完成测试!')
    return rewards, ma_rewards


if __name__ == "__main__":
    cfg = DQNConfig()
    plot_cfg = PlotConfig()
    # 训练
    env, agent = env_agent_config(cfg, seed=1)
    rewards, ma_rewards = train(cfg, env, agent)
    make_dir(plot_cfg.result_path, plot_cfg.model_path)  # 创建保存结果和模型路径的文件夹
    agent.save(path=plot_cfg.model_path)  # 保存模型
    save_results(rewards, ma_rewards, tag='train',
                 path=plot_cfg.result_path)  # 保存结果
    plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="train")  # 画出结果
    # 测试
    env, agent = env_agent_config(cfg, seed=10)
    agent.load(path=plot_cfg.model_path)  # 导入模型
    rewards, ma_rewards = test(cfg, env, agent)
    save_results(rewards, ma_rewards, tag='test',
                 path=plot_cfg.result_path)  # 保存结果
    plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="test")  # 画出结果
Exemple #3
0
        rewards.append(ep_reward)
        if ma_rewards:
            ma_rewards.append(0.9 * ma_rewards[-1] + 0.1 * ep_reward)
        else:
            ma_rewards.append(ep_reward)
    print('Complete training!')
    return rewards, ma_rewards


if __name__ == "__main__":
    cfg = HierarchicalDQNConfig()

    # train
    env, agent = env_agent_config(cfg, seed=1)
    rewards, ma_rewards = train(cfg, env, agent)
    make_dir(cfg.result_path, cfg.model_path)
    agent.save(path=cfg.model_path)
    save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
    plot_rewards(rewards,
                 ma_rewards,
                 tag="train",
                 algo=cfg.algo,
                 path=cfg.result_path)
    # eval
    env, agent = env_agent_config(cfg, seed=10)
    agent.load(path=cfg.model_path)
    rewards, ma_rewards = eval(cfg, env, agent)
    save_results(rewards, ma_rewards, tag='eval', path=cfg.result_path)
    plot_rewards(rewards,
                 ma_rewards,
                 tag="eval",
Exemple #4
0
            '/'+curr_time+'/models/'  # 保存模型的路径
        self.save = True  # 是否保存图片
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")  # 检测GPU


def env_agent_config(cfg, seed=1):
    env = NormalizedActions(gym.make(cfg.env_name))  # 装饰action噪声
    env.seed(seed)  # 随机种子
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    agent = DDPG(state_dim, action_dim, cfg)
    return env, agent


cfg = DDPGConfig()
plot_cfg = PlotConfig()
# 训练
env, agent = env_agent_config(cfg, seed=1)
rewards, ma_rewards = train(cfg, env, agent)
make_dir(plot_cfg.result_path, plot_cfg.model_path)
agent.save(path=plot_cfg.model_path)
save_results(rewards, ma_rewards, tag='train', path=plot_cfg.result_path)
plot_rewards(rewards, ma_rewards, plot_cfg, tag="train")  # 画出结果
# 测试
env, agent = env_agent_config(cfg, seed=10)
agent.load(path=plot_cfg.model_path)
rewards, ma_rewards = test(plot_cfg, env, agent)
save_results(rewards, ma_rewards, tag='test', path=cfg.result_path)
plot_rewards(rewards, ma_rewards, plot_cfg, tag="test")  # 画出结果
Exemple #5
0
            if done:
                break
        rewards.append(ep_reward)
        if ma_rewards:
            ma_rewards.append(ma_rewards[-1] * 0.9 + ep_reward * 0.1)
        else:
            ma_rewards.append(ep_reward)
        print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}")
    print('完成测试!')
    env.close()
    return rewards, ma_rewards


if __name__ == "__main__":
    cfg = Config()
    # 训练
    env, agent = env_agent_config(cfg)
    rewards, ma_rewards = train(cfg, env, agent)
    make_dir(cfg.result_path, cfg.model_path)  # 创建保存结果和模型路径的文件夹
    agent.save(path=cfg.model_path)  # 保存模型
    save_results(rewards, ma_rewards, tag='train',
                 path=cfg.result_path)  # 保存结果
    plot_rewards(rewards, ma_rewards, cfg, tag="train")  # 画出结果
    # 测试
    env, agent = env_agent_config(cfg)
    agent.load(path=cfg.model_path)  # 导入模型
    rewards, ma_rewards = test(cfg, env, agent)
    save_results(rewards, ma_rewards, tag='test',
                 path=cfg.result_path)  # 保存结果
    plot_rewards(rewards, ma_rewards, cfg, tag="test")  # 画出结果
Exemple #6
0
curr_path = os.path.dirname(__file__)
parent_path = os.path.dirname(curr_path)
sys.path.append(parent_path)  # add current terminal path to sys.path

import gym
import torch
import datetime
from DQN.agent import DQN
from common.plot import plot_rewards
from common.utils import save_results, make_dir, del_empty_dir

SEQUENCE = datetime.datetime.now().strftime(
    "%Y%m%d-%H%M%S")  # obtain current time
SAVED_MODEL_PATH = curr_path + "/saved_model/" + SEQUENCE + '/'  # path to save model
RESULT_PATH = curr_path + "/results/" + SEQUENCE + '/'  # path to save rewards
make_dir(curr_path + "/saved_model/", curr_path + "/results/")
del_empty_dir(curr_path + "/saved_model/", curr_path + "/results/")


class DQNConfig:
    def __init__(self):
        self.env = 'LunarLander-v2'
        self.algo = "DQN"  # name of algo
        self.gamma = 0.95
        self.epsilon_start = 1  # e-greedy策略的初始epsilon
        self.epsilon_end = 0.01
        self.epsilon_decay = 500
        self.lr = 0.0001  # learning rate
        self.memory_capacity = 1000000  # Replay Memory容量
        self.batch_size = 64
        self.train_eps = 300  # 训练的episode数目
Exemple #7
0
            agent.update()
        if i_episode % cfg.target_update == 0:
            agent.target_net.load_state_dict(agent.policy_net.state_dict())
        print('Episode:{}/{}, Reward:{}'.format(i_episode + 1, cfg.train_eps,
                                                ep_reward))
        rewards.append(ep_reward)
        # 计算滑动窗口的reward
        if ma_rewards:
            ma_rewards.append(0.9 * ma_rewards[-1] + 0.1 * ep_reward)
        else:
            ma_rewards.append(ep_reward)
    print('Complete training!')
    return rewards, ma_rewards


if __name__ == "__main__":
    cfg = DQNConfig()
    env = gym.make(cfg.env)
    env.seed(1)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    agent = DQN(state_dim, action_dim, cfg)
    rewards, ma_rewards = train(cfg, env, agent)
    make_dir(cfg.result_path)
    agent.save(path=cfg.result_path)
    save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
    plot_rewards(rewards,
                 ma_rewards,
                 tag="train",
                 algo=cfg.algo,
                 path=cfg.result_path)
def get_summary_writer():
    name = str(datetime.datetime.now())[:19]
    utils.make_dir(PATH['TF_LOGS'])
    logs_path = os.path.join(PATH['TF_LOGS'], name)
    return SummaryWriter(logs_path)