def _get_obs(self): """ Function to normalize observation and save plot values :return: normalized states """ self.T_plot = np.append(self.T_plot, self.x[0][0]) self.Ebat_plot = np.append(self.Ebat_plot, self.x[1][0]) self.T_ref_plot = np.append(self.T_ref_plot, self.maxtracking[0]) # normalize state t = minmax_norm(self.x[0][0], self.lbx[0][0], self.ubx[0][0]) e = minmax_norm(self.x[1][0], self.lbx[1][0], self.ubx[1][0]) return np.array([[t], [e]])
def constrain_action(self, action, env): # solve optimization problem state = env.x state_new = np.copy(state) state_new[0, 0] = minmax_norm(state[0, 0], env.lbx[0], env.ubx[0]) state_new[1, 0] = minmax_norm(state[1, 0], env.lbx[1], env.ubx[1]) param = np.reshape( np.concatenate((np.reshape(action, (1, 2))[0], np.reshape(state_new, (1, 2))[0])), (1, 4)) res = self.solver(lbx=self.lbax, ubx=self.ubax, lbg=self.lbg, ubg=self.ubg, p=param) # p = mu new_action = res['x'] return new_action
def get_future_tracking(self): """ Function to return the future reference trajectory of the temperature :return: """ ref = [0] * self.nb_maxtracking for i in range(self.nb_maxtracking): ref[i] = minmax_norm(self.maxtracking[i], self.lbx[0][0], self.ubx[0][0]) return ref
def get_future_dist(self, nb_disturbance): """ Get nb_disturbance future disturbance values :param nb_disturbance: :return: """ dist = [] test = int(nb_disturbance / 3) for i in range(test): count = (self.k + i) % self.room_temp.shape[0] dist.append( minmax_norm(self.room_temp.item(count), self.room_temp_min, self.room_temp_max)) dist.append( minmax_norm(self.sol_rad.item(count), self.sol_rad_min, self.sol_rad_max)) dist.append( minmax_norm(self.int_gains.item(count), self.int_gains_min, self.int_gains_max)) return dist
num_constraint_e = 0 num_unconstraint_t = 0 num_unconstraint_e = 0 for i in range(num_sim): # sample new random action env.reset_states() env_original.x = np.copy(env.x) state_old = np.copy(env.x) action1 = np.random.uniform(0, 1) action2 = np.random.uniform(-1, 1) action = np.array([action1, action2]) state_old[0, 0] = minmax_norm(state_old[0, 0], 20, 25) state_old[1, 0] = minmax_norm(state_old[1, 0], 0, 200000) param = np.reshape( np.concatenate((action, np.reshape(state_old, (1, 2))[0])), (1, 4)) # with the old state and the random input, a new projected input is obtained res = solver(lbx=lbax, ubx=ubax, lbg=lbg, ubg=ubg, p=param) # p = mu new_action = res['x'] # apply new projected and old input to the system _, _ = env.step(new_action) _, _ = env_original.step(action) # count the number of constraint violations if env_original.x[0, 0] > 25 or env_original.x[0, 0] < 20:
def mpc_step(self, x_ref_values, x_init, NN_flag, min1=-inf, min2=-inf, mindist1=-inf, mindist2=-inf, mindist3=-inf, max1=inf, max2=inf, maxdist1=inf, maxdist2=inf, maxdist3=inf): # Symbolic variables X = SX.sym("X", (self.N + 1) * self.nx, 1) U = SX.sym("U", self.N * self.nu, 1) lbu = np.array([[-1], [-1]]) ubu = np.array([[1], [1]]) if NN_flag == 1: #lbx = np.array([[minmax_norm(0, min1, max1)], [minmax_norm(0, min2, max2)]]) #ubx = np.array([[minmax_norm(1, min1, max1)], [minmax_norm(1, min2, max2)]]) lbx = np.array([[0], [0]]) ubx = np.array([[1], [1]]) #lbu = np.array([[-inf], [-inf]]) #ubu = np.array([[inf], [inf]]) else: lbx = np.array([[20], [0]]) ubx = np.array([[25], [200000*self.factor]]) mpc_x = np.zeros((self.S + 1 - self.N, self.nx)) mpc_x[0, :] = x_init.T mpc_u = np.zeros((self.S - self.N, self.nu)) for step in range(self.S - self.N): J = 0 # system discription G = [] lbg = [] ubg = [] lb_X = [] ub_X = [] lb_U = [] ub_U = [] for k in range(self.N): x_k = X[k * self.nx:(k + 1) * self.nx, :] x_k_next = X[(k + 1) * self.nx:(k + 2) * self.nx, :] u_k = U[k * self.nu:(k + 1) * self.nu, :] if NN_flag == 1 and self.dist != 0: # normalize disturbances room_temp = minmax_norm(self.room_temp.item(step+k), mindist1, maxdist1) sol_rad = minmax_norm(self.sol_rad.item(step+k), mindist2, maxdist2) int_gains = minmax_norm(self.int_gains.item(step + k), mindist3, maxdist3) d_k = np.array([room_temp, sol_rad, int_gains]) else: d_k = np.array([[self.room_temp.item(step + k)], [self.sol_rad.item(step + k)], [self.int_gains.item(step + k)]]) # objective print(step + k) x_ref = x_ref_values[:, step + k].T J += self.J_function_stage(x_ref, x_k, u_k) # equality constraints (system equation) x_next = self.system_nominal(x_k, u_k, d_k) if k == 0: G.append(x_k) lbg.append(x_init) ubg.append(x_init) G.append(minus(x_next, x_k_next)) lbg.append(np.zeros((self.nx, 1))) ubg.append(np.zeros((self.nx, 1))) # inequality constraints lb_X.append(lbx) ub_X.append(ubx) lb_U.append(lbu) ub_U.append(ubu) # Terminal cost and constraints x_k = X[self.N * self.nx:(self.N + 1) * self.nx, :] J += self.J_function_terminal(x_ref_values[:, step], x_k) lb_X.append(lbx) ub_X.append(ubx) # solve optimization problem # f - function , x - varaibles, g - constrains lb = vertcat(vertcat(*lb_X), vertcat(*lb_U)) ub = vertcat(vertcat(*ub_X), vertcat(*ub_U)) prob = {'f': J, 'x': vertcat(X, U), 'g': vertcat(*G)} opts = {} # opts["ipopt.print_level"] = 0 # opts["print_time"] = 0 solver = nlpsol('solver', 'ipopt', prob, opts) # nlpsol ipopt, qpsol qpoases res = solver(lbx=lb, ubx=ub, lbg=vertcat(*lbg), ubg=vertcat(*ubg)) u_opt = res['x'][(self.N + 1) * self.nx:(self.N + 1) * self.nx + self.nu, :] d = np.array([[self.room_temp.item(step)], [self.sol_rad.item(step)], [self.int_gains.item(step)]]) if NN_flag == 1: x_init[0,0] = minmax_norm_back(x_init[0,0], min1, max1) x_init[1, 0] = minmax_norm_back(x_init[1,0], min2, max2) x_plus = self.system_nominal_real(x_init, u_opt, d) if NN_flag == 1: x_plus[0,0] = minmax_norm(x_plus[0,0], min1, max1) x_plus[1, 0] = minmax_norm(x_plus[1, 0], min2, max2) mpc_x[step + 1, :] = x_plus.T mpc_u[step, :] = u_opt.T x_init = x_plus return mpc_x, mpc_u
break elif state_flag == 2 and env.x[0][0] > 25: env.x[0][0] = 25 print("over") break # save action, old state and new state states_new = np.vstack([states_new, [env.x[state, 0]]]) states_old = np.vstack([states_old, [old_state[state, 0]]]) actions_train = np.vstack([actions_train, action]) old_state = env.x """----NORMALIZE DATASET---------------------------------------------------------------------------------------------""" # max min normalization of data x = (x-min)/(max-min) states_old_norm = minmax_norm(states_old, min(states_old), max(states_old)) states_new_norm = minmax_norm(states_new, min(states_new), max(states_new)) """----GENERATE NEURAL NETWORK---------------------------------------------------------------------------------------""" # generate and train neural network which describes c(s,a) # inintalize neural network network = NN(num_in, num_out, num_hidden, activation, activation, activation_out, optimizer) # load weights if they allready exist if os.path.isfile('constraints_{}_weights.h5f'.format(ENV_NAME)): path = 'constraints_{}_weights.h5f'.format(ENV_NAME) network.model.load_weights(path) print("load weights") # train constraint function inputs = np.concatenate((states_old_norm, actions_train), axis=1)
else: min_state1 = min(states_old[:, 0]) max_state1 = max(states_old[:,0]) min_state2 = min(states_old[:, 1]) max_state2 = max(states_old[:, 1]) min_dist1 = min(disturbances[:, 0]) max_dist1 = max(disturbances[:, 0]) min_dist2 = min(disturbances[:, 1]) max_dist2 = max(disturbances[:, 1]) min_dist3 = min(disturbances[:, 2]) max_dist3 = max(disturbances[:, 2]) states_old_norm[:, 0] = minmax_norm(states_old_norm[:, 0], min_state1, max_state1) states_new_norm[:, 0] = minmax_norm(states_new_norm[:, 0], min_state1, max_state1) states_old_norm[:, 1] = minmax_norm(states_old_norm[:, 1], min_state2, max_state2) states_new_norm[:, 1] = minmax_norm(states_new_norm[:, 1], min_state2, max_state2) if dist_flag != 0: disturbances_norm[:, 0] = minmax_norm(disturbances_norm[:, 0], min_dist1, max_dist1) disturbances_norm[:, 1] = minmax_norm(disturbances_norm[:, 1], min_dist2, max_dist2) disturbances_norm[:, 2] = minmax_norm(disturbances_norm[:, 2], min_dist3, max_dist3) """----Split into test and trainigs data-----------------------------------------------------------------------------""" [train_states_new, test_states_new] = np.split(states_new_norm, [round(num_episodes*num_samples*0.9), ]) [train_states_old, test_states_old] = np.split(states_old_norm, [round(num_episodes*num_samples*0.9), ]) [train_actions, test_actions] = np.split(actions, [round(num_episodes*num_samples*0.9), ])
ref = np.ones((2, S + N + 1)) ref[0, :] = ref[0, :] * 22.5 # ref[0][45:-1] = ref[0][45:-1] + 4 ref[1, :] = ref[1, :] * 100000 x_init = np.array([[20], [0]]) mpc_model = MPC(S, N, Q, R, dist, dist_flag) states_model, actions_model = mpc_model.mpc_step( ref, x_init, 0, min_state1, min_state2, min_dist1, min_dist2, min_dist3, max_state1, max_state2, max_dist1, max_dist2, max_dist3) ref = np.ones((2, S + N + 1)) ref[0, :] = minmax_norm(22.5, min_state1, max_state1) ref[1, :] = minmax_norm(100000, min_state2, max_state2) x_init = np.array([[minmax_norm(20, min_state1, max_state1)], [minmax_norm(0, min_state2, max_state2)]]) mpc_network = MPC(S, N, Q, R, dist, dist_flag, network) start = time.time() states_network, actions_network = mpc_network.mpc_step( ref, x_init, 1, min_state1, min_state2, min_dist1, min_dist2, min_dist3, max_state1, max_state2, max_dist1, max_dist2, max_dist3) end = time.time() print("TIME in minutes:", (end - start) / 60) plt.figure() plt.subplot(311) plt.plot(20 * np.ones((len(states_network), )), '--', color='grey') plt.plot(25 * np.ones((len(states_network), )), '--', color='grey')