Beispiel #1
0
    stepcount = 0
    state = env.reset()
    done = False
    current_rewards = 0
    current_time_learned = []

    while not done:
        action = agent.choose_action(state, i_episode, train=True)
        next_state, reward, done, info = env.step(action)
        if reward == 1: total_cleared += 1
        agent.store_transition(state, action, reward, next_state, done)
        state = next_state

        time_start = datetime.now()

        agent.learn(BATCH_SIZE, i_episode)

        time_difference = datetime.now() - time_start
        current_time_learned.append(time_difference.total_seconds())

        stepcount += 1
        current_rewards += reward
        if stepcount % 10000 == 0:
            print("At step", stepcount)

    if USE_TB:
        writer.add_scalar("reward", current_rewards, i_episode)
        writer.add_scalar("steps done", stepcount, i_episode)
        writer.add_scalar("epsilon", agent.get_eps(i_episode), i_episode)
        writer.add_scalar("Total cleared", total_cleared, i_episode)
    rewards.append(current_rewards)
Beispiel #2
0
for i_episode in range(NUM_EPISODES):
    stepcount = 0
    state = env.reset()
    done = False
    current_rewards = 0
    current_time_learned = []

    while not done:
        action = agent.choose_action(state, i_episode, train=True)
        next_state, reward, done, info = env.step(action)

        agent.store_transition(state, action, reward, next_state, done)

        time_start = datetime.now()

        agent.learn(BATCH_SIZE, i_episode, use_DDQN=False)

        state = next_state
        time_difference = datetime.now() - time_start
        current_time_learned.append(time_difference.total_seconds())

        stepcount += 1
        current_rewards += reward
    if USE_TB:
        writer.add_scalar("reward", current_rewards, i_episode)
        writer.add_scalar("steps done", stepcount, i_episode)
        writer.add_scalar("epsilon", agent.get_eps(i_episode), i_episode)
    rewards.append(current_rewards)
    time_learned.append(sum(current_time_learned) / len(current_time_learned))

    if (i_episode % 50 == 0 and i_episode != 0):