Ejemplo n.º 1
0
for k in range(8):
    Q = [np.zeros((len(X), len(U))) for i in range(len(t_time) + 1)]
    epsilon = 0.5
    for epoch in range(8001):
        state_track = []
        x_k = 0
        r_cum = 0
        for i, t in enumerate(t_time):
            x_index = np.where(round(x_k) == X)[0][0]
            if np.random.random() > epsilon:
                u_k = U[np.argmin(Q[i][x_index, :]).item()]
            else:
                u_k = np.random.choice(U)  # kW of charging power drawn
            u_index = np.where(u_k == U)[0][0]
            x_k1, r_k = fleet.environment(t, x_k, u_k * n_ev * charger,
                                          [min_e[i], max_e[i]])
            r_cum += r_k
            a = Q[i][x_index][u_index]
            b = np.min(Q[i + 1][np.where(round(x_k1.item()) == X)[0][0], :])
            Q[i][x_index][u_index] = a + alpha * (r_k + gamma * b - a)
            x_k = x_k1.item()
            state_track.append(x_k1)
        # print("Cost =", r_cum)
        rew.append(r_cum)
        if epoch % 700 == 0:
            epsilon = epsilon * 0.6
    R.append(np.asarray(rew).take(np.int32(S)))
    rew = []

R = np.vstack(R)
R_max = np.amax(R, axis=0)
Ejemplo n.º 2
0
S, R_min, R_mean, R_max, min_e, max_e, t_time, price, mip_opt, value = qLearn.export_output(
)
hours = len(t_time)
days = 150
X = np.arange(0, int(round(max(max_e)[0])) + 1, 1)
U = np.linspace(0, 1, 11)
epsilon = 1
gamma = 1
mul = 0.96
Q = []  # Q table

for hour in reversed(t_time):  # Build Q table
    q = np.zeros((len(X), len(U)))
    for num_s, x_t in enumerate(X):
        for num_a, u_t in enumerate(U):
            x_t1, r = fleet.environment(hour, x_t, u_t * n_ev * charger,
                                        [min_e[hour - 6], max_e[hour - 6]])
            q[num_s, num_a] = r
            if hour != 18:
                q[num_s, num_a] += gamma * np.min(
                    Q[len(Q) - 1][np.where(np.round(x_t1) == X)[0][0], :])
    Q.append(q)

Q.reverse()  # Sort Q in the right order

x_T = []
u_T = []
T = []
q_T = []

# Training
for i in range(1000):