def test_dqn(EV_list_DQN_REF, CS_list_DQN_REF, graph, env, agent ): agent.epsilon = 0 for e, pev in enumerate(EV_list_DQN_REF): done = False score = 0 path=[] state, source, destination = env.test_reset(pev, graph, CS_list_DQN_REF) print("\nEpi:", e, agent.epsilon) print(source,'->', destination) print('sim time:', env.sim_time) path.append(source) while not done: action = agent.get_action(state) next_state, next_node, reward, done = env.step(action) score += reward state = next_state path.append(next_node) if done: print('sim time:', env.sim_time) print('Distance:', pev.totaldrivingdistance) print('Driving time:', pev.totaldrivingtime) print(pev.charged, pev.curr_location, pev.init_SOC, pev.curr_SOC) print(path) while pev.curr_location != pev.destination: came_from, cost_so_far = rt.a_star_search(graph, pev.curr_location, pev.destination) path = rt.reconstruct_path(came_from, pev.curr_location, pev.destination) path_distance = graph.get_path_distance(path) # print("evcango: {} path dist: {}".format(evcango, path_distance)) pev.next_location = path[1] pev.path.append(pev.next_location) env.sim_time, time = rt.update_ev(pev, graph, pev.curr_location, pev.next_location, env.sim_time) pev.curr_location = pev.next_location
def step(self, action, done): cs, pev_SOC, front_path, total_d_time, waiting_time, charging_time = self.path_info[ action] # print(self.sim_time, self.pev.curr_location, cs.id, pev_SOC) self.target = cs if pev_SOC <= 0.0: print('error soc', pev_SOC) done = 1 reward = -5 return np.zeros((1, self.state_size)), -1, reward, done if len(front_path) > 1: next_node = front_path[1] self.sim_time, time = rt.update_ev(self.pev, self.graph, self.pev.curr_location, next_node, self.sim_time) if self.sim_time == 0 and time == 0: print('time idx error') done = 1 reward = -5 return np.zeros((1, self.state_size)), -1, reward, done done = 0 reward = -1 * (time) self.pev.curr_location = next_node self.path_info = rt.sim_main_first_time_check( self.pev, self.CS_list, self.graph, self.action_size) next_state = [self.pev.curr_location, self.pev.curr_SOC] for path in self.path_info: cs, pev_SOC, front_path, total_d_time, waiting_time, charging_time = path next_state += [total_d_time, waiting_time, charging_time] next_state = np.reshape(next_state, [1, self.state_size]) return next_state, next_node, reward, done elif self.pev.curr_location == cs.id: self.pev.before_charging_SOC = self.pev.curr_SOC self.pev.cscharingenergy = self.pev.maxBCAPA * self.pev.req_SOC - self.pev.curr_SOC * self.pev.maxBCAPA self.pev.cschargingcost = self.pev.cscharingenergy * cs.price[int( self.sim_time / 5)] self.pev.curr_SOC = self.pev.req_SOC self.pev.cschargingtime = charging_time self.pev.cschargingwaitingtime = waiting_time self.pev.charged = 1 self.pev.cs = cs self.sim_time += charging_time * 60 self.sim_time += waiting_time * 60 # print(waiting_time, waiting_time * 60) self.pev.csdrivingtime = self.pev.totaldrivingtime self.pev.csdistance = self.pev.totaldrivingdistance self.pev.cschargingwaitingtime = self.pev.cschargingwaitingtime self.pev.cschargingtime = self.pev.cschargingtime self.pev.cssoc = self.pev.curr_SOC done = 1 reward = -1 * (waiting_time + charging_time) print(done, '충전소야') return np.zeros((1, self.state_size)), -1, reward, done else: print("???") input()
def step(self, action, done): cs, pev_SOC, front_path, rear_path, front_path_distance, rear_path_distance, front_d_time, rear_d_time, rear_consump_energy, waiting_time, charging_time = self.path_info[ action] # print(self.sim_time, self.pev.curr_location, cs.id, pev_SOC) self.pev.path.append(self.pev.curr_location) if len(front_path) > 1: next_node = front_path[1] self.sim_time, time = rt.update_ev(self.pev, self.graph, self.pev.curr_location, next_node, self.sim_time) if self.sim_time == 0 and time == 0: done = 1 reward = -10 return np.zeros((1, self.state_size)), -1, reward, done if pev_SOC <= 0.0: done = 1 reward = -10 return np.zeros((1, self.state_size)), -1, reward, done done = 0 reward = -1 # reward = -time self.pev.curr_location = next_node self.path_info = rt.get_feature_state(self.sim_time, self.pev, self.CS_list, self.graph, self.action_size) next_state = [ self.pev.source, self.pev.destination, self.pev.curr_SOC ] for path in self.path_info: cs, pev_SOC, front_path, rear_path, front_path_distance, rear_path_distance, front_d_time, rear_d_time, rear_consump_energy, waiting_time, charging_time = path # next_state += [front_d_time, rear_d_time, waiting_time, charging_time] next_state += [front_path_distance, rear_path_distance] next_state = np.reshape(next_state, [1, self.state_size]) return next_state, next_node, reward, done elif self.pev.curr_location == cs.id: self.pev.before_charging_SOC = self.pev.curr_SOC self.pev.cscharingenergy = self.pev.maxBCAPA * self.pev.req_SOC - self.pev.curr_SOC * self.pev.maxBCAPA self.pev.cschargingcost = self.pev.cscharingenergy * cs.price[int( self.sim_time / 5)] self.pev.curr_SOC = self.pev.req_SOC self.pev.cschargingtime = ( self.pev.cscharingenergy / (cs.chargingpower * self.pev.charging_effi)) self.pev.cschargingwaitingtime = cs.waittime[int(self.sim_time / 5)] self.pev.charged = 1 self.pev.cs = cs self.pev.csid = cs.id self.sim_time += self.pev.cschargingtime * 60 self.sim_time += self.pev.cschargingwaitingtime * 60 # print(waiting_time, waiting_time * 60) self.pev.fdist = self.pev.totaldrivingdistance self.pev.csdrivingtime = self.pev.totaldrivingtime self.pev.csdistance = self.pev.totaldrivingdistance self.pev.cschargingwaitingtime = self.pev.cschargingwaitingtime self.pev.cschargingtime = self.pev.cschargingtime self.pev.cssoc = self.pev.curr_SOC done = 1 # reward = -1 * (self.pev.waitingtime + self.pev.chargingtime + rear_d_time + (rear_consump_energy/(cs.chargingpower*self.pev.charging_effi))) reward = -1 * len(rear_path) # print(done, '충전소야') return np.zeros((1, self.state_size)), -1, reward, done else: print("???") input()
scores.append(score) episodes.append(e) steps.append(step) pev = env.pev while pev.curr_location != pev.destination: came_from, cost_so_far = rt.a_star_search(graph, pev.curr_location, pev.destination) path = rt.reconstruct_path(came_from, pev.curr_location, pev.destination) path_distance = graph.get_path_distance(path) # print("evcango: {} path dist: {}".format(evcango, path_distance)) pev.next_location = path[1] pev.path.append(pev.next_location) env.sim_time, time = rt.update_ev(pev, graph, pev.curr_location, pev.next_location, env.sim_time) pev.curr_location = pev.next_location EV_list_TA = copy.deepcopy(EV_list) CS_list_TA = copy.deepcopy(CS_list) # ta.one_time_check_timeweight(EV_list_TA, CS_list_TA, graph, 313) ta.one_time_check_distweight(EV_list_TA, CS_list_TA, graph, 313) EV_list_TEA = copy.deepcopy(EV_list) CS_list_TEA = copy.deepcopy(CS_list) # ta.every_time_check_timeweight(EV_list_TEA, CS_list_TEA, graph, 313) ta.every_time_check_distweight(EV_list_TEA, CS_list_TEA, graph, 313) ta.sim_result_text(resultdir, EV_list_DQN=EV_list_DQN, EV_list_TA=EV_list_TA,