Ejemplo n.º 1
0
def dqn_test(episode):
    env = DoorOpenTaskEnv(resolution=(64, 64))
    agent = DQNAgent(name='door_open', dim_img=(64, 64, 3), dim_act=5)
    model_path = os.path.join(sys.path[0], 'saved_models', agent.name,
                              'models')
    agent.dqn_active = tf.keras.models.load_model(model_path)

    steps = env.max_episode_steps
    start_time = time.time()
    success_counter = 0
    episodic_returns = []
    sedimentary_returns = []
    ep_rew = 0
    agent.epsilon = 0.0
    for ep in range(episode):
        obs, info = env.reset()
        ep_rew = 0
        img = obs.copy()
        for st in range(steps):
            act = agent.epsilon_greedy(img)
            obs, rew, done, info = env.step(act)
            img = obs.copy()
            ep_rew += rew
            if done:
                break

        # log infomation for each episode
        episodic_returns.append(ep_rew)
        sedimentary_returns.append(sum(episodic_returns) / (ep + 1))
        if env.open:
            success_counter += 1

        rospy.loginfo(
            "\n================\nEpisode: {} \nEpisodeLength: {} \nEpisodeTotalRewards: {} \nAveragedTotalReward: {} \nSuccess: {} \nTime: {} \n================\n"
            .format(ep + 1, st + 1, ep_rew, sedimentary_returns[-1],
                    success_counter,
                    time.time() - start_time))
Ejemplo n.º 2
0
    model_saver = ModelSaver(500)

    # use tensorboard
    train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
    summary_writer = tf.summary.create_file_writer(model_path)
    summary_writer.set_as_default()

    start_time = time.time()
    for ep in range(num_episodes):
        obs, info = env.reset()
        ep_rew = 0
        img = obs.copy()
        agent_p.linear_epsilon_decay(curr_ep=ep)
        for st in range(num_steps):
            act = agent_p.epsilon_greedy(img)
            obs, rew, done, info = env.step(act)
            nxt_img = obs.copy()
            ep_rew += rew
            step_counter += 1
            # store transition
            agent_p.replay_buffer.store(img, act, rew, done, nxt_img)
            # train one step
            if ep >= agent_p.warmup_episodes:
                if not step_counter % train_freq:
                    for _ in range(train_freq):
                        agent_p.train_one_step(train_loss)
            # finish step, EXTREMELY IMPORTANT!!!
            img = nxt_img.copy()
            logging.debug(
                "\n-\nepisode: {}, step: {} \naction: {} \nreward: {} \ndone: {}"