batch_size = 100 additional_episodes = math.ceil(batch_size / n_steps) n_episodes += additional_episodes # Loop over episodes while episode_counter < n_episodes: agent.reset() loss_avg = 0 for step_num in range(n_steps): transition = agent.step(dqn, epsilon) replay_buffer.add(transition) if replay_buffer.get_len() >= batch_size: batch_transitions = replay_buffer.sample(batch_size) loss = dqn.train_q_network(batch_transitions, gamma, do_target) loss_avg += loss if epsilon_decay and epsilon > epsilon_min and ( episode_counter - additional_episodes) >= 0: epsilon = epsilon - epsilon_step # if show_env: time.sleep(0.1) # if epsilon_decay and epsilon > epsilon_min: # epsilon = epsilon - epsilon_step episode_counter += 1 print(f'Episode: {episode_counter}')