for k in range(8): Q = [np.zeros((len(X), len(U))) for i in range(len(t_time) + 1)] epsilon = 0.5 for epoch in range(8001): state_track = [] x_k = 0 r_cum = 0 for i, t in enumerate(t_time): x_index = np.where(round(x_k) == X)[0][0] if np.random.random() > epsilon: u_k = U[np.argmin(Q[i][x_index, :]).item()] else: u_k = np.random.choice(U) # kW of charging power drawn u_index = np.where(u_k == U)[0][0] x_k1, r_k = fleet.environment(t, x_k, u_k * n_ev * charger, [min_e[i], max_e[i]]) r_cum += r_k a = Q[i][x_index][u_index] b = np.min(Q[i + 1][np.where(round(x_k1.item()) == X)[0][0], :]) Q[i][x_index][u_index] = a + alpha * (r_k + gamma * b - a) x_k = x_k1.item() state_track.append(x_k1) # print("Cost =", r_cum) rew.append(r_cum) if epoch % 700 == 0: epsilon = epsilon * 0.6 R.append(np.asarray(rew).take(np.int32(S))) rew = [] R = np.vstack(R) R_max = np.amax(R, axis=0)
S, R_min, R_mean, R_max, min_e, max_e, t_time, price, mip_opt, value = qLearn.export_output( ) hours = len(t_time) days = 150 X = np.arange(0, int(round(max(max_e)[0])) + 1, 1) U = np.linspace(0, 1, 11) epsilon = 1 gamma = 1 mul = 0.96 Q = [] # Q table for hour in reversed(t_time): # Build Q table q = np.zeros((len(X), len(U))) for num_s, x_t in enumerate(X): for num_a, u_t in enumerate(U): x_t1, r = fleet.environment(hour, x_t, u_t * n_ev * charger, [min_e[hour - 6], max_e[hour - 6]]) q[num_s, num_a] = r if hour != 18: q[num_s, num_a] += gamma * np.min( Q[len(Q) - 1][np.where(np.round(x_t1) == X)[0][0], :]) Q.append(q) Q.reverse() # Sort Q in the right order x_T = [] u_T = [] T = [] q_T = [] # Training for i in range(1000):