def train_ddpg(): args = DDPGArgs() env = gym.make(args.env_name) agent = DDPGAgent(env, DDPGQNet, DDPGActor, SimpleNormalizer, args) for ep in range(args.max_ep): agent.train_one_episode() if ep % args.test_interval == 0: agent.test_model()
def train_ddpg_with_onecar_oneuav(): args = DDPGArgs() env = Env() agent = DDPGAgent(env, DDPGQNet, DDPGActor, SimpleNormalizer, args) max_reward = 0 for ep in range(args.max_ep): agent.train_one_episode() if ep % args.test_interval == 0: mean_reward = agent.test_model() if mean_reward > max_reward: max_reward = mean_reward print('max_reward:{}'.format(max_reward)) dir = './result/ddpg/' + args.env_name + '/' if not os.path.exists(dir): os.makedirs(dir) agent.save(dir) env.close()