"Visualy test Agent, after reset top frame should be a reduced image of size {} by {}" .format(width, height)) agent._reset() io.imshow(agent.state[-1]) plt.show() input("Press any key to continue") plt.close() print( "resetting experience buffer, then observe state and invoke agent play method 100 times ..." ) experience_buffer.buffer.clear() agent._reset() for i in range(100): play_random = True agent.play_step(play_random) print("number of experiences in the buffer: {}. Buffer capacity {}".format( len(experience_buffer), capacity)) input( "Press any key to see a random initial state image from the experience buffer" ) states, actions, rewards, dones, next_states = experience_buffer.sample(1) io.imshow(states[0][3]) plt.show() input("Press any key to see the next state image for the same experience") plt.close() io.imshow(next_states[0][3]) plt.show() input("Press any key to continue")
# to track the mean across epochs total_mean = 0 total_count = 0 dt = datetime.datetime.now() date_time_string = dt.strftime("%Y%m%d-%H-%M-%S") model_path = "models/" + date_time_string os.makedirs(model_path, mode=0o755, exist_ok=True) steps = 0 last_reward_step = 0 if args.initialise_buffer == "init": print("Initialising and saving buffer with %d transitions" % args.replay_buffer_size) for step in trange(args.replay_buffer_size, leave=False): agent.play_step(True, device, write_to_buffer=False) with open("experience_buffer.pickle", "wb") as f: pickle.dump(experience_buffer, f) sys.exit() elif args.initialise_buffer == "load": print("loading buffer ...") with open("experience_buffer.pickle", "rb") as f: experience_buffer = pickle.load(f) print("buffer size: {}".format(len(experience_buffer))) else: print("Initialising buffer with %d transitions" % initialisation_steps) for step in trange(initialisation_steps, leave=False): agent.play_step(True, device)