コード例 #1
0
            next_state, Reward, Done, Info = env.step(
                control_input,
                t,
                setpoint=[set_point1, set_point2],
                noise=False,
                economics='mixed',
                w_y1=1,
                w_y2=0)

            # RL Feedback
            if t == rl.eval_feedback and t > 150:
                rl.matrix_update(
                    action_index, Reward, state,
                    [env.y[t, 0] - set_point1, env.y[t, 1] - set_point2], 5)
                tot_reward = tot_reward + Reward

        rlist.append(tot_reward)

        # Autosave Q, T, and NT matrices
        rl.autosave(episode, 100)

        if episode % 10 == 0:
            print("Episode {} | Current Reward {}".format(episode, tot_reward))

    env.plots(timestart=50, timestop=6000)
    # plt.scatter(PID1.u[40:env.y.shape[0]], env.y[40:, 0])
    # plt.show()

    # plt.scatter(PID2.u[40:env.y.shape[0]], env.y[40:, 1])
    # plt.show()
コード例 #2
0
            # Generate input tuple
            control_input = np.array([[input_1, input_2]])

            # Simulate next time
            next_state, Reward, Done, Info = env.step(
                control_input,
                t,
                setpoint=[set_point1, set_point2],
                noise=False,
                economics='distillate')

            # RL Feedback
            if t == rl.eval_feedback:
                rl.matrix_update(
                    action_index, Reward, state,
                    env.y[t, :] - np.array([set_point1, set_point2]), 5)
                tot_reward = tot_reward + Reward

        rlist.append(tot_reward)

        # Autosave Q, T, and NT matrices
        rl.autosave(iteration, 250)

    env.plots(timestart=50, timestop=5950)
    # plt.scatter(PID1.u[40:env.y.shape[0]], env.y[40:, 0])
    # plt.show()

    # plt.scatter(PID2.u[40:env.y.shape[0]], env.y[40:, 1])
    # plt.show()