Exemple #1
0
    batch_size = 100
    additional_episodes = math.ceil(batch_size / n_steps)
    n_episodes += additional_episodes

    # Loop over episodes
    while episode_counter < n_episodes:

        agent.reset()
        loss_avg = 0

        for step_num in range(n_steps):

            transition = agent.step(dqn, epsilon)
            replay_buffer.add(transition)

            if replay_buffer.get_len() >= batch_size:
                batch_transitions = replay_buffer.sample(batch_size)
                loss = dqn.train_q_network(batch_transitions, gamma, do_target)
                loss_avg += loss

                if epsilon_decay and epsilon > epsilon_min and (
                        episode_counter - additional_episodes) >= 0:
                    epsilon = epsilon - epsilon_step

            # if show_env: time.sleep(0.1)

        # if epsilon_decay and epsilon > epsilon_min:
        #     epsilon = epsilon - epsilon_step

        episode_counter += 1
        print(f'Episode: {episode_counter}')