Esempio n. 1
0
                _, _, errors = learner.get_targets([(0, (state, action, reward,
                                                         new_state, done))])
                learner.add_memory(errors[0],
                                   (state, action, reward, new_state, done))

                state = new_state

                if done or step == max_steps - 1:
                    score_list.append(step)
                    break

            # Decay the epsilon
            if actor.epsilon > actor.epsilon_min:
                actor.epsilon *= actor.epsilon_decay

        print("Episode: {}, Score: {}/{}".format(
            e,
            sum(score_list) / len(score_list), max_steps - 1))
        scores.append(sum(score_list) / len(score_list))
        x = range(len(scores))
        y = scores

        learner.replay()
        plt.scatter(x[-1], y[-1])
        plt.pause(0.05)

        if e % 25 == 0:
            learner.update_target()

    learner.save_model('./models/inverted_pendulum_v0.2.h5')