stepcount = 0 state = env.reset() done = False current_rewards = 0 current_time_learned = [] while not done: action = agent.choose_action(state, i_episode, train=True) next_state, reward, done, info = env.step(action) if reward == 1: total_cleared += 1 agent.store_transition(state, action, reward, next_state, done) state = next_state time_start = datetime.now() agent.learn(BATCH_SIZE, i_episode) time_difference = datetime.now() - time_start current_time_learned.append(time_difference.total_seconds()) stepcount += 1 current_rewards += reward if stepcount % 10000 == 0: print("At step", stepcount) if USE_TB: writer.add_scalar("reward", current_rewards, i_episode) writer.add_scalar("steps done", stepcount, i_episode) writer.add_scalar("epsilon", agent.get_eps(i_episode), i_episode) writer.add_scalar("Total cleared", total_cleared, i_episode) rewards.append(current_rewards)
for i_episode in range(NUM_EPISODES): stepcount = 0 state = env.reset() done = False current_rewards = 0 current_time_learned = [] while not done: action = agent.choose_action(state, i_episode, train=True) next_state, reward, done, info = env.step(action) agent.store_transition(state, action, reward, next_state, done) time_start = datetime.now() agent.learn(BATCH_SIZE, i_episode, use_DDQN=False) state = next_state time_difference = datetime.now() - time_start current_time_learned.append(time_difference.total_seconds()) stepcount += 1 current_rewards += reward if USE_TB: writer.add_scalar("reward", current_rewards, i_episode) writer.add_scalar("steps done", stepcount, i_episode) writer.add_scalar("epsilon", agent.get_eps(i_episode), i_episode) rewards.append(current_rewards) time_learned.append(sum(current_time_learned) / len(current_time_learned)) if (i_episode % 50 == 0 and i_episode != 0):