def train(): env = gym.make('CartPole-v0') agent = DQNAgent(env=env) num_episodes = 200 for i_episode in range(num_episodes): state = env.reset() total_reward = 0 while True: action = agent.get_action(state) next_state, reward, done, info = env.step(action) total_reward += reward update_array = [state, action, reward, next_state, done] agent.update(update_array) state = next_state if done: print("Episode ", i_episode, ": ", total_reward, " epsilon: ", agent.epsilon) break agent.save('myClassModel') env.close()
env = gym.make(env_id) # env = gym.wrappers.Monitor(env, osp.join(log_dir,"record")) env = WrapPytorch(env) agent = DQNAgent(env, log_dir=log_dir) # agent.load(log_dir) episode_rewards = [] ep = 0 obs = env.reset() episode_reward = 0 for frame in range(Config.MAX_FRAMES): # print("frame", frame) # env.render() epsilon = Config.epsilon_by_frame(frame) action = agent.get_action(obs, epsilon) prev_obs = obs obs, reward, done, _ = env.step(action) episode_reward += reward agent.update(prev_obs, action, reward, obs, frame) if done: episode_rewards.append(episode_reward) agent.writer.add_scalar("data/reward", episode_reward, ep) print("episode", ep, "reward:", episode_reward) ep += 1 obs = env.reset() episode_reward = 0 if ep % 50 == 0: agent.save(log_dir) agent.save(log_dir)