Ejemplo n.º 1
0
        }
        session.run(train_op, feed_dict)
        session.run(target_update)
        return session.run(q_loss, feed_dict)


# %% main loop
losses = []
returns = []

for n in range(ITERATIONS):
    epsilon = 1.0 / np.sqrt(n + 1)
    steps, total_return = play_once(env, epsilon)

    returns.append(total_return)
    if MINIMAL_SAMPLES < replay_buffer.number_of_samples():
        loss = train()
        losses.append(loss)

    if n != 0 and n % 10 == 0:
        print("Episode:", n, "Returns:", total_return, "epsilon:", epsilon)

#%% Demo

filename = os.path.basename(__file__).split('.')[0]
monitor_dir = './' + filename + '_' + str(datetime.now())
env = gym.wrappers.Monitor(env, monitor_dir)
for n in range(DEMO_NUMBER):
    play_once(env, 0.0, render=True)

# %%Close Environment
Ejemplo n.º 2
0

def demo():
    demo_env = gym.wrappers.Monitor(env,
                                    MONITOR_DIR,
                                    resume=True,
                                    mode="evaluation",
                                    write_upon_reset=True)
    steps, total_return = play_once(demo_env, 0.05, render=True)
    print("Demo for %d steps, Return %d" % (steps, total_return))


# Populate replay buffer
epsilon = 1.0
print("Populating replay buffer with epsilon %f..." % epsilon)
while MINIMAL_SAMPLES > replay_buffer.number_of_samples():
    steps, total_return = play_once(env, epsilon, render=False)
    print("Played %d < %d steps" %
          (replay_buffer.number_of_samples(), MINIMAL_SAMPLES))

# Main loop
print("Start Main Loop...")
n = 0.5
for n in range(ITERATIONS):
    epsilon = 1.0 / np.sqrt(n + 1)
    steps, total_return = play_once(env, epsilon)

    t0 = datetime.now()
    for _ in range(steps):
        train()
        global_step += 1