} session.run(train_op, feed_dict) session.run(target_update) return session.run(q_loss, feed_dict) # %% main loop losses = [] returns = [] for n in range(ITERATIONS): epsilon = 1.0 / np.sqrt(n + 1) steps, total_return = play_once(env, epsilon) returns.append(total_return) if MINIMAL_SAMPLES < replay_buffer.number_of_samples(): loss = train() losses.append(loss) if n != 0 and n % 10 == 0: print("Episode:", n, "Returns:", total_return, "epsilon:", epsilon) #%% Demo filename = os.path.basename(__file__).split('.')[0] monitor_dir = './' + filename + '_' + str(datetime.now()) env = gym.wrappers.Monitor(env, monitor_dir) for n in range(DEMO_NUMBER): play_once(env, 0.0, render=True) # %%Close Environment
def demo(): demo_env = gym.wrappers.Monitor(env, MONITOR_DIR, resume=True, mode="evaluation", write_upon_reset=True) steps, total_return = play_once(demo_env, 0.05, render=True) print("Demo for %d steps, Return %d" % (steps, total_return)) # Populate replay buffer epsilon = 1.0 print("Populating replay buffer with epsilon %f..." % epsilon) while MINIMAL_SAMPLES > replay_buffer.number_of_samples(): steps, total_return = play_once(env, epsilon, render=False) print("Played %d < %d steps" % (replay_buffer.number_of_samples(), MINIMAL_SAMPLES)) # Main loop print("Start Main Loop...") n = 0.5 for n in range(ITERATIONS): epsilon = 1.0 / np.sqrt(n + 1) steps, total_return = play_once(env, epsilon) t0 = datetime.now() for _ in range(steps): train() global_step += 1