def simulation():

    # MPC Evaluation Period
    eval_period = 5

    # Model Initiation
    model = MimoCstr(nsim=50, k0=8.2e10)

    # MPC Initiation
    mpc_init = ModelPredictiveControl(10, model.Nx, model.Nu, 0.1, 0.1, 0.1,
                                      model.xs, model.us)

    # MPC Construction
    mpc_control = mpc_init.get_mpc_controller(model.cstr_ode,
                                              eval_period,
                                              model.x0,
                                              random=False,
                                              verbosity=0)

    # Output Disturbance
    output_disturb = np.zeros(model.Nx)
    x_corrected = np.zeros([model.Nsim + 1, model.Nx])
    """
    Simulation portion
    """

    for t in range(model.Nsim):
        """
        Disturbance
        """

        # if t == 10:
        #     model.disturbance()
        """
        MPC evaluation
        """

        if t % eval_period == 0 and t != 0:
            # Solve the MPC optimization problem
            mpc_init.solve_mpc(model.x, model.u, model.xsp, mpc_control, t)
            if t != 0:
                output_disturb = model.xs - model.x[t, :]
        elif t < 5:
            model.u[t, :] = [300, 0.1]
        else:
            model.u[t, :] = model.u[t - 1, :]

        # Calculate the next stages
        model.x[t + 1, :] = model.next_state(model.x[t, :], model.u[t, :])
        x_corrected[t + 1, :] = model.x[t + 1, :] + output_disturb

    print(model.cost_function(x_corrected))

    return model, mpc_init, x_corrected
Esempio n. 2
0
def simulation():

    # Plant Model
    model_plant = MimoCstr(nsim=50)

    # Build Controller Model
    model_control = MimoCstr(nsim=model_plant.Nsim,
                             nx=model_plant.Nx * 2,
                             xs=np.array([0.878, 324.5, 0.659, 0, 0, 0]),
                             x0=np.array([1, 310, 0.659, 0, 0, 0]),
                             control=True)

    # MPC Object Initiation
    control = ModelPredictiveControl(model_control.Nsim,
                                     10,
                                     model_control.Nx,
                                     model_control.Nu,
                                     0.1,
                                     0.1,
                                     0.1,
                                     model_control.xs,
                                     model_control.us,
                                     dist=True)

    # MPC Construction
    mpc_control = control.get_mpc_controller(model_control.cstr_ode,
                                             control.eval_time,
                                             model_control.x0,
                                             random_guess=False)
    """
    Simulation portion
    """

    for t in range(model_plant.Nsim):

        # Solve the MPC optimization problem, obtain current input and predicted state
        model_control.u[t, :], model_control.x[t + 1, :] = control.solve_mpc(
            model_plant.x, model_plant.xsp, mpc_control, t, control.p)

        # Calculate the next states for the plant
        model_plant.x[t + 1, :] = model_plant.next_state(
            model_plant.x[t, :], model_control.u[t, :])

        # Update the P parameters for offset-free control
        control.p = model_plant.x[t + 1, :] - model_control.x[t + 1, 0:3]

    print(model_plant.cost_function())

    return model_plant, model_control, control
Esempio n. 3
0
with tf.Session() as sess:
    sess.run(init)

    for i_episode in range(epi):

        # obs = env.reset()
        obs = model.reset()

        for step in range(1, model.Nsim + 1):

            action_val = action.eval(feed_dict={X: obs.reshape(1, num_inputs)})
            action_picked = action_list[action_val[0][0]]  #
            model.u[step, 0] = model.u[step - 1, 0] + action_picked  #

            model.x[step, :] = model.next_state(model.x[step - 1, :],
                                                model.u[step, :])  #

            obs = model.x[step, :]  #
            reward = reward_calc(model.x[step, 1], 324.5)  #

            # obs, reward, done, _ = env.step(action_val[0][0])

            if step == model.Nsim:

                # avg_steps.append(step)
                # print("Done after steps {}".format(step))

                avg_reward.append(reward)  #
                reward = 0  #
                break
Esempio n. 4
0
                a, allQ = sess.run(
                    [predict, Qout],
                    feed_dict={inputs: np.identity(len(states))[s:s + 1]})

                number = np.random.rand()
                if number < 0.0:
                    a = [np.random.randint(0, len(actions))]

                model.u[j, 0] = model.u[j - 1, 0] + rl.actions[a[0]]

                rl.feedback_evaluation(j)

            else:
                model.u[j, :] = model.u[j - 1, :]

            model.x[j, :] = model.next_state(model.x[j - 1, :], model.u[j, :])

            if j == rl.eval_feedback:
                s1 = rl.state_detection(model.x[j, 1])
                r = reward_calc(model.x[j, 1], 324.5)
                Q1 = sess.run(
                    Qout,
                    feed_dict={inputs: np.identity(len(states))[s1:s1 + 1]})
                maxQ1 = np.argmax(Q1)
                targetQ = allQ
                targetQ[0, a] = r + rl.discount_factor * maxQ1

                _, W1 = sess.run([updateModel, W],
                                 feed_dict={
                                     inputs: np.identity(len(states))[s:s + 1],
                                     nextQ: targetQ