def simulation(): # MPC Evaluation Period eval_period = 5 # Model Initiation model = MimoCstr(nsim=50, k0=8.2e10) # MPC Initiation mpc_init = ModelPredictiveControl(10, model.Nx, model.Nu, 0.1, 0.1, 0.1, model.xs, model.us) # MPC Construction mpc_control = mpc_init.get_mpc_controller(model.cstr_ode, eval_period, model.x0, random=False, verbosity=0) # Output Disturbance output_disturb = np.zeros(model.Nx) x_corrected = np.zeros([model.Nsim + 1, model.Nx]) """ Simulation portion """ for t in range(model.Nsim): """ Disturbance """ # if t == 10: # model.disturbance() """ MPC evaluation """ if t % eval_period == 0 and t != 0: # Solve the MPC optimization problem mpc_init.solve_mpc(model.x, model.u, model.xsp, mpc_control, t) if t != 0: output_disturb = model.xs - model.x[t, :] elif t < 5: model.u[t, :] = [300, 0.1] else: model.u[t, :] = model.u[t - 1, :] # Calculate the next stages model.x[t + 1, :] = model.next_state(model.x[t, :], model.u[t, :]) x_corrected[t + 1, :] = model.x[t + 1, :] + output_disturb print(model.cost_function(x_corrected)) return model, mpc_init, x_corrected
def simulation(): # Plant Model model_plant = MimoCstr(nsim=50) # Build Controller Model model_control = MimoCstr(nsim=model_plant.Nsim, nx=model_plant.Nx * 2, xs=np.array([0.878, 324.5, 0.659, 0, 0, 0]), x0=np.array([1, 310, 0.659, 0, 0, 0]), control=True) # MPC Object Initiation control = ModelPredictiveControl(model_control.Nsim, 10, model_control.Nx, model_control.Nu, 0.1, 0.1, 0.1, model_control.xs, model_control.us, dist=True) # MPC Construction mpc_control = control.get_mpc_controller(model_control.cstr_ode, control.eval_time, model_control.x0, random_guess=False) """ Simulation portion """ for t in range(model_plant.Nsim): # Solve the MPC optimization problem, obtain current input and predicted state model_control.u[t, :], model_control.x[t + 1, :] = control.solve_mpc( model_plant.x, model_plant.xsp, mpc_control, t, control.p) # Calculate the next states for the plant model_plant.x[t + 1, :] = model_plant.next_state( model_plant.x[t, :], model_control.u[t, :]) # Update the P parameters for offset-free control control.p = model_plant.x[t + 1, :] - model_control.x[t + 1, 0:3] print(model_plant.cost_function()) return model_plant, model_control, control
with tf.Session() as sess: sess.run(init) for i_episode in range(epi): # obs = env.reset() obs = model.reset() for step in range(1, model.Nsim + 1): action_val = action.eval(feed_dict={X: obs.reshape(1, num_inputs)}) action_picked = action_list[action_val[0][0]] # model.u[step, 0] = model.u[step - 1, 0] + action_picked # model.x[step, :] = model.next_state(model.x[step - 1, :], model.u[step, :]) # obs = model.x[step, :] # reward = reward_calc(model.x[step, 1], 324.5) # # obs, reward, done, _ = env.step(action_val[0][0]) if step == model.Nsim: # avg_steps.append(step) # print("Done after steps {}".format(step)) avg_reward.append(reward) # reward = 0 # break
a, allQ = sess.run( [predict, Qout], feed_dict={inputs: np.identity(len(states))[s:s + 1]}) number = np.random.rand() if number < 0.0: a = [np.random.randint(0, len(actions))] model.u[j, 0] = model.u[j - 1, 0] + rl.actions[a[0]] rl.feedback_evaluation(j) else: model.u[j, :] = model.u[j - 1, :] model.x[j, :] = model.next_state(model.x[j - 1, :], model.u[j, :]) if j == rl.eval_feedback: s1 = rl.state_detection(model.x[j, 1]) r = reward_calc(model.x[j, 1], 324.5) Q1 = sess.run( Qout, feed_dict={inputs: np.identity(len(states))[s1:s1 + 1]}) maxQ1 = np.argmax(Q1) targetQ = allQ targetQ[0, a] = r + rl.discount_factor * maxQ1 _, W1 = sess.run([updateModel, W], feed_dict={ inputs: np.identity(len(states))[s:s + 1], nextQ: targetQ