_, _, errors = learner.get_targets([(0, (state, action, reward, new_state, done))]) learner.add_memory(errors[0], (state, action, reward, new_state, done)) state = new_state if done or step == max_steps - 1: score_list.append(step) break # Decay the epsilon if actor.epsilon > actor.epsilon_min: actor.epsilon *= actor.epsilon_decay print("Episode: {}, Score: {}/{}".format( e, sum(score_list) / len(score_list), max_steps - 1)) scores.append(sum(score_list) / len(score_list)) x = range(len(scores)) y = scores learner.replay() plt.scatter(x[-1], y[-1]) plt.pause(0.05) if e % 25 == 0: learner.update_target() learner.save_model('./models/inverted_pendulum_v0.2.h5')