def dqn_test(episode): env = DoorOpenTaskEnv(resolution=(64, 64)) agent = DQNAgent(name='door_open', dim_img=(64, 64, 3), dim_act=5) model_path = os.path.join(sys.path[0], 'saved_models', agent.name, 'models') agent.dqn_active = tf.keras.models.load_model(model_path) steps = env.max_episode_steps start_time = time.time() success_counter = 0 episodic_returns = [] sedimentary_returns = [] ep_rew = 0 agent.epsilon = 0.0 for ep in range(episode): obs, info = env.reset() ep_rew = 0 img = obs.copy() for st in range(steps): act = agent.epsilon_greedy(img) obs, rew, done, info = env.step(act) img = obs.copy() ep_rew += rew if done: break # log infomation for each episode episodic_returns.append(ep_rew) sedimentary_returns.append(sum(episodic_returns) / (ep + 1)) if env.open: success_counter += 1 rospy.loginfo( "\n================\nEpisode: {} \nEpisodeLength: {} \nEpisodeTotalRewards: {} \nAveragedTotalReward: {} \nSuccess: {} \nTime: {} \n================\n" .format(ep + 1, st + 1, ep_rew, sedimentary_returns[-1], success_counter, time.time() - start_time))
model_saver = ModelSaver(500) # use tensorboard train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32) summary_writer = tf.summary.create_file_writer(model_path) summary_writer.set_as_default() start_time = time.time() for ep in range(num_episodes): obs, info = env.reset() ep_rew = 0 img = obs.copy() agent_p.linear_epsilon_decay(curr_ep=ep) for st in range(num_steps): act = agent_p.epsilon_greedy(img) obs, rew, done, info = env.step(act) nxt_img = obs.copy() ep_rew += rew step_counter += 1 # store transition agent_p.replay_buffer.store(img, act, rew, done, nxt_img) # train one step if ep >= agent_p.warmup_episodes: if not step_counter % train_freq: for _ in range(train_freq): agent_p.train_one_step(train_loss) # finish step, EXTREMELY IMPORTANT!!! img = nxt_img.copy() logging.debug( "\n-\nepisode: {}, step: {} \naction: {} \nreward: {} \ndone: {}"