def init_mdp_old(self): #get all network nodes node_count = len(traci.junction.getIDList()) mdp = [] for node in range(0, node_count): no = (self.network.getNode(str(node+1))) list_edges = Node.getOutgoing(no) #for each node find all outgoing edges states = [] for l in list_edges: #add the actions (edges) to the states (nodes): edge-id, q-value, reward (travel time on link) states.append([str(Edge.getID(l)), -50*random.random(), -50*random.random()]) mdp.append(states) return mdp
def init_mdp(self): #get all network nodes junction_list = traci.junction.getIDList() usefull = [i for i in junction_list if not ':' in i] mdp = dict([(x, []) for x in usefull]) for node in usefull: no = (self.network.getNode(node)) list_edges = Node.getOutgoing(no) states = dict([(x.getID(), []) for x in list_edges if not ':' in x.getID()]) #for each node find all outgoing edgesTrue for e in list_edges: #add the actions (edges) to the states (nodes): edge-id, q-value, reward (travel time on link) states[e.getID()] = ([10*random.random(), 10*random.random()]) mdp[node] = states return mdp
def process_vehicle(self, driver): id_current_edge = traci.lane.getEdgeID(traci.vehicle.getLaneID(self.drivers[int(driver)].id)) #Driver needs a new route if( self.isEdge(id_current_edge) and (self.drivers[int(driver)].current_link == id_current_edge) and (self.drivers[int(driver)].current_link!= self.drivers[int(driver)].destination) and (self.drivers[int(driver)].isUpdate)): next_node = self.get_destination_node(id_current_edge) id_next_node = Node.getID(next_node) action_key = '' #next action, if(random.random()<self.qlearning.epislon): #returns one action randomly action_key = self.drivers[int(driver)].mdp[id_next_node].keys()[random.randint(0, len(self.drivers[int(driver)].mdp[id_next_node].keys())-1)] #exploitation else: #returns the action that minimize the travel_time the reward action_key = self.return_best_action(driver, id_next_node) #update q-table #current q_value current_q_value = self.drivers[int(driver)].mdp[id_next_node][action_key][0] #action reward reward = -1*self.drivers[int(driver)].mdp[id_next_node][action_key][1] #node maximizes the reward on action future_node = self.get_destination_node(action_key) #id node maximizes the reward on action id_new_future_edge = Node.getID(future_node) #q_value of the best action best_action = self.drivers[int(driver)].mdp[id_new_future_edge][self.return_best_action(driver, id_new_future_edge)][0] #new q_value q_value = ((1-self.qlearning.alpha)*current_q_value + self.qlearning.alpha*(abs(reward)+self.qlearning.gamma*best_action)) #updates q_value self.drivers[int(driver)].mdp[id_next_node][action_key][0] = q_value #if it's ok, update the route just on dijkstra new_route = self.return_route(self.drivers[int(driver)].current_link, action_key) #insert the new route in the vehicle traci.vehicle.setRoute(self.drivers[int(driver)].id, new_route) #update the driver steps self.drivers[int(driver)].steps += 1 self.drivers[int(driver)].isUpdate = False #update the travel time on the last action elif(self.isEdge(id_current_edge) and (self.drivers[int(driver)].current_link!=id_current_edge)): origin_node = self.get_origin_node(self.drivers[int(driver)].current_link).getID() for action in self.drivers[int(driver)].mdp[origin_node]: #set the total travel time on link if str(action) == str(self.drivers[int(driver)].current_link): link_tt = self.drivers[int(driver)].get_travel_time_on_link(self.get_time()) if link_tt < 0: print 'errado' self.drivers[int(driver)].mdp[origin_node][action][1] = link_tt break #update the drivers properties self.drivers[int(driver)].current_link = id_current_edge self.drivers[int(driver)].link_tt = self.get_time() self.drivers[int(driver)].isUpdate = True #The car is arriving on the destination elif(self.isEdge(id_current_edge) and (self.drivers[int(driver)].current_link==id_current_edge) and (self.drivers[int(driver)].current_link == self.drivers[int(driver)].destination)): last_node = self.get_origin_node(self.drivers[int(driver)].current_link) next_node = self.get_destination_node(self.drivers[int(driver)].current_link).getID() for action in self.drivers[int(driver)].mdp[last_node.getID()].keys(): if(action==str(self.drivers[int(driver)].current_link)): '''in the future we need change this for the total travel time*-1''' #total_tt = - self.drivers[int(driver)].get_total_travel_time(self.get_time()) self.drivers[int(driver)].mdp[last_node.getID()][action][0] = -self.drivers[int(driver)].get_total_travel_time(self.get_time()) break #driver rechead his goal #print '\t\tpassos ', self.drivers[int(driver)].steps self.drivers[int(driver)].isArrived = True self.running_drivers.remove(self.drivers[int(driver)].id)