Python minmax_normの例、Abgabe.Normalize.MinMax.minmax_norm Pythonの例

コード例 #1

0

ファイルを表示

ファイル: Linear_Env.py プロジェクト: wdwangdan/Constraint_RL_MPC

    def _get_obs(self):
        """
        Function to normalize observation and save plot values
        :return: normalized states
        """

        self.T_plot = np.append(self.T_plot, self.x[0][0])
        self.Ebat_plot = np.append(self.Ebat_plot, self.x[1][0])
        self.T_ref_plot = np.append(self.T_ref_plot, self.maxtracking[0])

        # normalize state
        t = minmax_norm(self.x[0][0], self.lbx[0][0], self.ubx[0][0])
        e = minmax_norm(self.x[1][0], self.lbx[1][0], self.ubx[1][0])

        return np.array([[t], [e]])

コード例 #2

0

ファイルを表示

ファイル: DDPG.py プロジェクト: wdwangdan/Constraint_RL_MPC

    def constrain_action(self, action, env):
        # solve optimization problem

        state = env.x
        state_new = np.copy(state)
        state_new[0, 0] = minmax_norm(state[0, 0], env.lbx[0], env.ubx[0])
        state_new[1, 0] = minmax_norm(state[1, 0], env.lbx[1], env.ubx[1])
        param = np.reshape(
            np.concatenate((np.reshape(action, (1, 2))[0],
                            np.reshape(state_new, (1, 2))[0])), (1, 4))
        res = self.solver(lbx=self.lbax,
                          ubx=self.ubax,
                          lbg=self.lbg,
                          ubg=self.ubg,
                          p=param)  # p = mu
        new_action = res['x']
        return new_action

コード例 #3

0

ファイルを表示

ファイル: Linear_Env.py プロジェクト: wdwangdan/Constraint_RL_MPC

 def get_future_tracking(self):
     """
     Function to return the future reference trajectory of the temperature
     :return:
     """
     ref = [0] * self.nb_maxtracking
     for i in range(self.nb_maxtracking):
         ref[i] = minmax_norm(self.maxtracking[i], self.lbx[0][0],
                              self.ubx[0][0])
     return ref

コード例 #4

0

ファイルを表示

ファイル: Linear_Env.py プロジェクト: wdwangdan/Constraint_RL_MPC

 def get_future_dist(self, nb_disturbance):
     """
     Get nb_disturbance future disturbance values
     :param nb_disturbance:
     :return:
     """
     dist = []
     test = int(nb_disturbance / 3)
     for i in range(test):
         count = (self.k + i) % self.room_temp.shape[0]
         dist.append(
             minmax_norm(self.room_temp.item(count), self.room_temp_min,
                         self.room_temp_max))
         dist.append(
             minmax_norm(self.sol_rad.item(count), self.sol_rad_min,
                         self.sol_rad_max))
         dist.append(
             minmax_norm(self.int_gains.item(count), self.int_gains_min,
                         self.int_gains_max))
     return dist

コード例 #5

0

ファイルを表示

num_constraint_e = 0

num_unconstraint_t = 0
num_unconstraint_e = 0

for i in range(num_sim):
    # sample new random action
    env.reset_states()
    env_original.x = np.copy(env.x)
    state_old = np.copy(env.x)

    action1 = np.random.uniform(0, 1)
    action2 = np.random.uniform(-1, 1)
    action = np.array([action1, action2])

    state_old[0, 0] = minmax_norm(state_old[0, 0], 20, 25)
    state_old[1, 0] = minmax_norm(state_old[1, 0], 0, 200000)

    param = np.reshape(
        np.concatenate((action, np.reshape(state_old, (1, 2))[0])), (1, 4))

    # with the old state and the random input, a new projected input is obtained
    res = solver(lbx=lbax, ubx=ubax, lbg=lbg, ubg=ubg, p=param)  # p = mu
    new_action = res['x']

    # apply new projected and old input to the system
    _, _ = env.step(new_action)
    _, _ = env_original.step(action)

    # count the number of constraint violations
    if env_original.x[0, 0] > 25 or env_original.x[0, 0] < 20:

コード例 #6

0

ファイルを表示

ファイル: MPC.py プロジェクト: wdwangdan/Constraint_RL_MPC

    def mpc_step(self, x_ref_values, x_init, NN_flag, min1=-inf, min2=-inf, mindist1=-inf, mindist2=-inf, mindist3=-inf,
                 max1=inf, max2=inf, maxdist1=inf, maxdist2=inf, maxdist3=inf):

        # Symbolic variables
        X = SX.sym("X", (self.N + 1) * self.nx, 1)
        U = SX.sym("U", self.N * self.nu, 1)
        lbu = np.array([[-1], [-1]])
        ubu = np.array([[1], [1]])
        if NN_flag == 1:
            #lbx = np.array([[minmax_norm(0, min1, max1)], [minmax_norm(0, min2, max2)]])
            #ubx = np.array([[minmax_norm(1, min1, max1)], [minmax_norm(1, min2, max2)]])
            lbx = np.array([[0], [0]])
            ubx = np.array([[1], [1]])
            #lbu = np.array([[-inf], [-inf]])
            #ubu = np.array([[inf], [inf]])
        else:
            lbx = np.array([[20], [0]])
            ubx = np.array([[25], [200000*self.factor]])

        mpc_x = np.zeros((self.S + 1 - self.N, self.nx))
        mpc_x[0, :] = x_init.T
        mpc_u = np.zeros((self.S - self.N, self.nu))

        for step in range(self.S - self.N):
            J = 0

            # system discription
            G = []
            lbg = []
            ubg = []
            lb_X = []
            ub_X = []
            lb_U = []
            ub_U = []

            for k in range(self.N):

                x_k = X[k * self.nx:(k + 1) * self.nx, :]
                x_k_next = X[(k + 1) * self.nx:(k + 2) * self.nx, :]
                u_k = U[k * self.nu:(k + 1) * self.nu, :]
                if NN_flag == 1 and self.dist != 0:
                    # normalize disturbances
                    room_temp = minmax_norm(self.room_temp.item(step+k), mindist1, maxdist1)
                    sol_rad = minmax_norm(self.sol_rad.item(step+k), mindist2, maxdist2)
                    int_gains = minmax_norm(self.int_gains.item(step + k), mindist3, maxdist3)
                    d_k = np.array([room_temp, sol_rad, int_gains])
                else:
                    d_k = np.array([[self.room_temp.item(step + k)], [self.sol_rad.item(step + k)],
                                [self.int_gains.item(step + k)]])

                # objective
                print(step + k)
                x_ref = x_ref_values[:, step + k].T
                J += self.J_function_stage(x_ref, x_k, u_k)

                # equality constraints (system equation)
                x_next = self.system_nominal(x_k, u_k, d_k)

                if k == 0:
                    G.append(x_k)
                    lbg.append(x_init)
                    ubg.append(x_init)

                G.append(minus(x_next, x_k_next))
                lbg.append(np.zeros((self.nx, 1)))
                ubg.append(np.zeros((self.nx, 1)))
                # inequality constraints
                lb_X.append(lbx)
                ub_X.append(ubx)
                lb_U.append(lbu)
                ub_U.append(ubu)

            # Terminal cost and constraints
            x_k = X[self.N * self.nx:(self.N + 1) * self.nx, :]
            J += self.J_function_terminal(x_ref_values[:, step], x_k)
            lb_X.append(lbx)
            ub_X.append(ubx)

            # solve optimization problem
            # f - function , x - varaibles, g - constrains
            lb = vertcat(vertcat(*lb_X), vertcat(*lb_U))
            ub = vertcat(vertcat(*ub_X), vertcat(*ub_U))
            prob = {'f': J, 'x': vertcat(X, U), 'g': vertcat(*G)}
            opts = {}
            # opts["ipopt.print_level"] = 0
            # opts["print_time"] = 0
            solver = nlpsol('solver', 'ipopt', prob, opts)  # nlpsol  ipopt,  qpsol qpoases
            res = solver(lbx=lb, ubx=ub, lbg=vertcat(*lbg), ubg=vertcat(*ubg))

            u_opt = res['x'][(self.N + 1) * self.nx:(self.N + 1) * self.nx + self.nu, :]
            d = np.array([[self.room_temp.item(step)], [self.sol_rad.item(step)], [self.int_gains.item(step)]])

            if NN_flag == 1:
                x_init[0,0] = minmax_norm_back(x_init[0,0], min1, max1)
                x_init[1, 0] = minmax_norm_back(x_init[1,0], min2, max2)
            x_plus = self.system_nominal_real(x_init, u_opt, d)
            if NN_flag == 1:
                x_plus[0,0] = minmax_norm(x_plus[0,0], min1, max1)
                x_plus[1, 0] = minmax_norm(x_plus[1, 0], min2, max2)
            mpc_x[step + 1, :] = x_plus.T
            mpc_u[step, :] = u_opt.T
            x_init = x_plus

        return mpc_x, mpc_u

コード例 #7

0

ファイルを表示

            break

        elif state_flag == 2 and env.x[0][0] > 25:
            env.x[0][0] = 25
            print("over")
            break

        # save action, old state and new state
        states_new = np.vstack([states_new, [env.x[state, 0]]])
        states_old = np.vstack([states_old, [old_state[state, 0]]])
        actions_train = np.vstack([actions_train, action])
        old_state = env.x
"""----NORMALIZE DATASET---------------------------------------------------------------------------------------------"""
# max min normalization of data x = (x-min)/(max-min)

states_old_norm = minmax_norm(states_old, min(states_old), max(states_old))
states_new_norm = minmax_norm(states_new, min(states_new), max(states_new))
"""----GENERATE NEURAL NETWORK---------------------------------------------------------------------------------------"""
# generate and train neural network which describes c(s,a)
# inintalize neural network
network = NN(num_in, num_out, num_hidden, activation, activation,
             activation_out, optimizer)

# load weights if they allready exist
if os.path.isfile('constraints_{}_weights.h5f'.format(ENV_NAME)):
    path = 'constraints_{}_weights.h5f'.format(ENV_NAME)
    network.model.load_weights(path)
    print("load weights")

# train constraint function
inputs = np.concatenate((states_old_norm, actions_train), axis=1)

コード例 #8

0

ファイルを表示

ファイル: Main_System_Identification.py プロジェクト: wdwangdan/Constraint_RL_MPC

else:

    min_state1 = min(states_old[:, 0])
    max_state1 = max(states_old[:,0])

    min_state2 = min(states_old[:, 1])
    max_state2 = max(states_old[:, 1])

    min_dist1 = min(disturbances[:, 0])
    max_dist1 = max(disturbances[:, 0])
    min_dist2 = min(disturbances[:, 1])
    max_dist2 = max(disturbances[:, 1])
    min_dist3 = min(disturbances[:, 2])
    max_dist3 = max(disturbances[:, 2])

states_old_norm[:, 0] = minmax_norm(states_old_norm[:, 0], min_state1, max_state1)
states_new_norm[:, 0] = minmax_norm(states_new_norm[:, 0], min_state1, max_state1)
states_old_norm[:, 1] = minmax_norm(states_old_norm[:, 1], min_state2, max_state2)
states_new_norm[:, 1] = minmax_norm(states_new_norm[:, 1], min_state2, max_state2)

if dist_flag != 0:

    disturbances_norm[:, 0] = minmax_norm(disturbances_norm[:, 0], min_dist1, max_dist1)
    disturbances_norm[:, 1] = minmax_norm(disturbances_norm[:, 1], min_dist2, max_dist2)
    disturbances_norm[:, 2] = minmax_norm(disturbances_norm[:, 2], min_dist3, max_dist3)


"""----Split into test and trainigs data-----------------------------------------------------------------------------"""
[train_states_new, test_states_new] = np.split(states_new_norm, [round(num_episodes*num_samples*0.9), ])
[train_states_old, test_states_old] = np.split(states_old_norm, [round(num_episodes*num_samples*0.9), ])
[train_actions, test_actions] = np.split(actions, [round(num_episodes*num_samples*0.9), ])

コード例 #9

0

ファイルを表示

ref = np.ones((2, S + N + 1))
ref[0, :] = ref[0, :] * 22.5
# ref[0][45:-1] = ref[0][45:-1] + 4
ref[1, :] = ref[1, :] * 100000

x_init = np.array([[20], [0]])

mpc_model = MPC(S, N, Q, R, dist, dist_flag)

states_model, actions_model = mpc_model.mpc_step(
    ref, x_init, 0, min_state1, min_state2, min_dist1, min_dist2, min_dist3,
    max_state1, max_state2, max_dist1, max_dist2, max_dist3)

ref = np.ones((2, S + N + 1))
ref[0, :] = minmax_norm(22.5, min_state1, max_state1)
ref[1, :] = minmax_norm(100000, min_state2, max_state2)
x_init = np.array([[minmax_norm(20, min_state1, max_state1)],
                   [minmax_norm(0, min_state2, max_state2)]])
mpc_network = MPC(S, N, Q, R, dist, dist_flag, network)
start = time.time()
states_network, actions_network = mpc_network.mpc_step(
    ref, x_init, 1, min_state1, min_state2, min_dist1, min_dist2, min_dist3,
    max_state1, max_state2, max_dist1, max_dist2, max_dist3)

end = time.time()
print("TIME in minutes:", (end - start) / 60)
plt.figure()
plt.subplot(311)
plt.plot(20 * np.ones((len(states_network), )), '--', color='grey')
plt.plot(25 * np.ones((len(states_network), )), '--', color='grey')