def __init__(self, config): random.seed(os.getpid()) self.vrp_problem = VRProblem(config['problem']) self.get_ortool_value() self.config = config self.random_demand = self.config['demand_sample'] self.num_nodes = self.vrp_problem.num_nodes self.num_trucks = self.vrp_problem.num_trucks self.pox_x = self.vrp_problem.node_pos_x self.pox_y = self.vrp_problem.node_pos_y self.vehicle_capacity = self.vrp_problem.vehicle_capacity self.action_space = gym.spaces.Discrete(3) self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None) self.depot = self.vrp_problem.depots[0] - 1 self.demand = [d for d in self.vrp_problem.node_demand] self.max_demand = np.max(self.demand) self.min_demand = self.max_demand for i in range(self.num_nodes): if i != self.depot and self.demand[i] < self.min_demand: self.min_demand = self.demand[i] self.total_cost = np.sum(self.cost_matrix) self.max_cost = np.max(self.cost_matrix) self.median_cost = np.median(self.cost_matrix) self.timestep = 0 self.episode_len = config['episode_len'] self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.cur_vehicle_index = 0 self.vehicle_remaining_capacity = 1.0 self.all_vehicle_remaining_capacity = np.ones(self.num_trucks) self.vehicle_pos = np.array([self.depot] * self.num_trucks) self.cur_vehicle_pos = self.depot self.node_remaining_demand = np.array( [d / self.vehicle_capacity for d in self.demand]) self.node_in_sequence = self.determine_node_sequence() self.cur_node_index = 1 self.cur_node = self.node_in_sequence[self.cur_node_index] self.eval_mode = False MAVRPSiteEnv.global_env_episilon = 0.9999 * MAVRPSiteEnv.global_env_episilon + 0.0001 * 0.05 self.env_episilon = MAVRPSiteEnv.global_env_episilon self.state = None self.reset() self.state_dim = self.state.shape[0] self.observation_space = Box(low=0.0, high=self.num_trucks, shape=(self.state_dim, ), dtype=np.float64) self.latest_layer = self.num_nodes - 1 self.layer_cnt = 1
def __init__(self, config): self.vrp_problem = VRProblem(config['problem']) self.config = config self.num_nodes = self.vrp_problem.num_nodes self.num_trucks = self.vrp_problem.num_trucks self.pox_x = self.vrp_problem.node_pos_x self.pox_y = self.vrp_problem.node_pos_y self.vehicle_capacity = self.vrp_problem.vehicle_capacity self.action_space = gym.spaces.Discrete(self.num_trucks) self.cost_matrix = self.vrp_problem.get_nodes_coordinates() self.demand = [d for d in self.vrp_problem.node_demand] self.max_demand = np.max(self.demand) self.min_demand = np.min(self.demand) self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix) self.total_cost = np.sum(self.cost_matrix) self.max_cost = np.max(self.cost_matrix) self.is_constraint_imposed = False self.constraint_automaton = CVPRConstraints(cvrp_constraints[0]) self.vehicle_remaining_capacity = np.ones(self.num_trucks) self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.cur_node_encoded = np.zeros(self.num_nodes) self.node_remaining_demand = np.zeros(self.num_nodes) self.vehicle_position = np.zeros((self.num_trucks, self.num_nodes)) self.constraint_status = np.zeros( self.constraint_automaton.num_constraint_states) self.depot = self.vrp_problem.depots[0] - 1 self.cur_node = self.depot self.cur_node_index = 0 self.step_reward = 0 self.timestep = 0 self.episode_len = self.num_nodes * 100 self.cur_vehicle_id = 0 self.node_in_sequence = [] angle_list = [] depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot]) for i in range(self.num_nodes): if i == self.depot: continue p = (self.pox_x[i], self.pox_y[i]) angle = compute_angle(depot_pos, p) angle_list.append((angle, i)) angle_list = sorted(angle_list, key=lambda x: x[0]) self.node_in_sequence = [a[1] for a in angle_list] self.state = None self.reset() self.state_dim = self.state.shape[0] self.observation_space = Box(low=0.0, high=self.num_trucks, shape=(self.state_dim, ), dtype=np.float64)
def __init__(self, config): self.vrp_problem = VRProblem(config['problem']) self.get_ortool_value() self.config = config self.num_nodes = self.vrp_problem.num_nodes self.num_trucks = self.vrp_problem.num_trucks self.pox_x = self.vrp_problem.node_pos_x self.pox_y = self.vrp_problem.node_pos_y self.vehicle_capacity = self.vrp_problem.vehicle_capacity self.action_space = gym.spaces.Discrete(2) self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None) self.demand = [d for d in self.vrp_problem.node_demand] self.max_demand = np.max(self.demand) self.min_demand = np.min(self.demand) self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix) self.total_cost = np.sum(self.cost_matrix) self.max_cost = np.max(self.cost_matrix) self.min_cost = np.min(self.cost_matrix) self.depot = self.vrp_problem.depots[0] - 1 self.timestep = 0 self.episode_len = config['episode_len'] self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.node_remaining_demand = np.array( [d / self.vehicle_capacity for d in self.demand]) self.node_in_sequence = self.determine_node_sequence() # self.node_in_sequence = [self.depot] + self.vrp_problem.node_in_sequence_ortool # self.node_in_sequence = [0, 5, 10, 15, 14, 17, 26, 6, 23, 4, 11, 18, 9, 22, 25, 30, 16, 7, 13, 2, 3, 29, 20, 8, 28, 24, 27, 12, 1, 21, 31, 19] # self.node_in_sequence = list(range(self.num_nodes)) self.cur_node_index = 1 self.cur_node = self.node_in_sequence[self.cur_node_index] self.vehicle_agents = [ VehicleAgent(i, self) for i in range(self.num_trucks) ] self.global_agent = GlobalAgent(self) self.state = None self.reset() self.state_dim = self.state.shape[0] self.observation_space = Box(low=0.0, high=self.num_trucks, shape=(self.state_dim, ), dtype=np.float64)
class CVRPEnv(gym.Env): def __init__(self, config): self.vrp_problem = VRProblem(config['problem']) self.config = config self.num_nodes = self.vrp_problem.num_nodes self.num_trucks = self.vrp_problem.num_trucks self.pox_x = self.vrp_problem.node_pos_x self.pox_y = self.vrp_problem.node_pos_y self.vehicle_capacity = self.vrp_problem.vehicle_capacity self.action_space = gym.spaces.Discrete(self.num_trucks) self.cost_matrix = self.vrp_problem.get_nodes_coordinates() self.demand = [d for d in self.vrp_problem.node_demand] self.max_demand = np.max(self.demand) self.min_demand = np.min(self.demand) self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix) self.total_cost = np.sum(self.cost_matrix) self.max_cost = np.max(self.cost_matrix) self.is_constraint_imposed = False self.constraint_automaton = CVPRConstraints(cvrp_constraints[0]) self.vehicle_remaining_capacity = np.ones(self.num_trucks) self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.cur_node_encoded = np.zeros(self.num_nodes) self.node_remaining_demand = np.zeros(self.num_nodes) self.vehicle_position = np.zeros((self.num_trucks, self.num_nodes)) self.constraint_status = np.zeros( self.constraint_automaton.num_constraint_states) self.depot = self.vrp_problem.depots[0] - 1 self.cur_node = self.depot self.cur_node_index = 0 self.step_reward = 0 self.timestep = 0 self.episode_len = self.num_nodes * 100 self.cur_vehicle_id = 0 self.node_in_sequence = [] angle_list = [] depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot]) for i in range(self.num_nodes): if i == self.depot: continue p = (self.pox_x[i], self.pox_y[i]) angle = compute_angle(depot_pos, p) angle_list.append((angle, i)) angle_list = sorted(angle_list, key=lambda x: x[0]) self.node_in_sequence = [a[1] for a in angle_list] self.state = None self.reset() self.state_dim = self.state.shape[0] self.observation_space = Box(low=0.0, high=self.num_trucks, shape=(self.state_dim, ), dtype=np.float64) def get_ortool_value(self): return self.vrp_problem.get_ortool_opt_val() def reset(self): self.cur_node_index = 0 # random.shuffle(self.node_in_sequence) self.demand = [d for d in self.vrp_problem.node_demand] self.cur_node = self.node_in_sequence[self.cur_node_index] self.vehicle_remaining_capacity = np.ones(self.num_trucks) self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.cur_node_encoded = np.zeros(self.num_nodes) # self.node_to_vehicle[:, self.cur_node] = 1.0 self.cur_node_encoded[self.cur_node] = 1.0 # self.node_remaining_demand = np.array([d/self.vehicle_capacity for d in self.demand]) self.node_remaining_demand = np.ones(self.num_nodes) self.node_remaining_demand[self.depot] = 0.0 self.vehicle_position_encoded = np.zeros(self.num_nodes) self.vehicle_position_encoded[self.depot] = self.num_trucks self.vehicle_position = np.zeros(self.num_trucks) self.vehicle_position[:] = self.depot self.state = self.state_calculator() self.step_reward = 0 self.timestep = 0 if self.is_constraint_imposed: self.constraint_automaton.reset() self.constraint_status[ self.constraint_automaton.automata_state] = 1 return np.array(self.state) def path_cost(self, node_to_vehicle): node_on_path = [] for i in range(self.num_nodes): if node_to_vehicle[i] == 1.0 and i != self.depot: node_on_path.append(i) visited_node = [self.depot] cur_node = self.depot total_cost = 0.0 for _ in range(len(node_on_path)): cost = [] for node in node_on_path: if node not in visited_node: cost.append((node, self.cost_matrix[cur_node][node])) cost = sorted(cost, key=(lambda x: x[1])) cur_node = cost[0][0] total_cost += cost[0][1] visited_node.append(cur_node) return total_cost, visited_node def validity_check(self): for i in range(self.num_trucks): if np.dot(self.node_to_vehicle[i, :], self.demand) > self.vehicle_capacity: return False return True def state_calculator(self): return np.concatenate( (self.vehicle_remaining_capacity, self.vehicle_position_encoded, self.node_remaining_demand, self.constraint_status)) # return np.concatenate((self.vehicle_remaining_capacity, # self.node_to_vehicle.flatten(), # self.vehicle_position.flatten(), # self.cur_node_encoded, # self.node_remaining_demand)) def step(self, action): self.step_reward = 0.0 dones = False self.node_to_vehicle[action, self.cur_node] = 1.0 cur_vehicle_pos = int(self.vehicle_position[action]) if self.vehicle_remaining_capacity[ action] + 1.0 / self.vehicle_capacity > self.demand[ self.cur_node] / self.vehicle_capacity: self.vehicle_remaining_capacity[action] = max( 0.0, self.vehicle_remaining_capacity[action] - self.demand[self.cur_node] / self.vehicle_capacity) self.step_reward = -self.cost_matrix[cur_vehicle_pos][ self.cur_node] / self.max_cost else: self.vehicle_remaining_capacity[action] = 0.0 self.step_reward = -1.0 - self.cost_matrix[cur_vehicle_pos][ self.cur_node] / self.max_cost self.vehicle_position_encoded[self.cur_node] -= 1.0 self.node_remaining_demand[self.cur_node] = 0.0 self.cur_node_encoded[self.cur_node] = 0.0 self.cur_node_index += 1 dones = (dones | (self.cur_node_index >= len(self.node_in_sequence))) if not dones: self.cur_node = self.node_in_sequence[self.cur_node_index] else: self.cur_node = self.depot self.vehicle_position_encoded[self.cur_node] += 1.0 self.cur_node_encoded[self.cur_node] = 1.0 self.vehicle_position[action] = self.cur_node if self.is_constraint_imposed: self.constraint_automaton.step(self.node_to_vehicle) if not self.constraint_automaton.is_accepted: self.step_reward -= 1.0 self.constraint_status[:] = 0 self.constraint_status[ self.constraint_automaton.automata_state] = 1 self.state = self.state_calculator() return np.array(self.state), self.step_reward, dones, {}
class CVRPSiteEnv(gym.Env): def __init__(self, config): self.vrp_problem = VRProblem(config['problem']) self.config = config self.num_nodes = self.vrp_problem.num_nodes self.num_trucks = self.vrp_problem.num_trucks self.pox_x = self.vrp_problem.node_pos_x self.pox_y = self.vrp_problem.node_pos_y self.vehicle_capacity = self.vrp_problem.vehicle_capacity self.action_space = gym.spaces.Discrete(self.num_nodes) self.cost_matrix = self.vrp_problem.get_nodes_coordinates() self.demand = [d for d in self.vrp_problem.node_demand] self.max_demand = np.max(self.demand) self.min_demand = np.min(self.demand) self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix) self.total_cost = np.sum(self.cost_matrix) self.max_cost = np.max(self.cost_matrix) self.min_cost = np.min(self.cost_matrix) self.is_constraint_imposed = False self.constraint_automaton = CVPRConstraints( cvrp_constraints[config['constraint_id']]) self.depot = self.vrp_problem.depots[0] - 1 self.cur_node = self.depot self.cur_node_index = 0 self.step_reward = 0 self.timestep = 0 self.episode_len = self.num_nodes * 20 self.cur_vehicle_id = 1 self.node_in_sequence = [] self.vehicle_total_remaining_capacity = np.array( [self.num_trucks - 1.0]) self.cur_vehicle_remaining_capacity = np.array([1.0]) self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.cur_node_encoded = np.zeros(self.num_nodes) self.node_remaining_demand = np.zeros(self.num_nodes) self.cur_vehicle_position = np.zeros(self.num_nodes) self.cur_vehicle_position[self.depot] = 1.0 self.constraint_status = np.zeros( self.constraint_automaton.num_constraint_states) angle_list = [] depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot]) for i in range(self.num_nodes): if i == self.depot: continue p = (self.pox_x[i], self.pox_y[i]) angle = compute_angle(depot_pos, p) angle_list.append((angle, i)) angle_list = sorted(angle_list, key=lambda x: x[0]) self.node_in_sequence = [self.depot] + [a[1] for a in angle_list] # self.node_in_sequence = [i for i in range(self.num_nodes)] self.state = None self.action_mask = None self.reset() self.state_dim = self.state.shape[0] self.observation_space = Box(low=0.0, high=self.num_trucks + 1, shape=(self.state_dim, ), dtype=np.float64) def state_calculator(self): self.state = np.concatenate( (self.cur_vehicle_remaining_capacity, self.cur_vehicle_position, self.constraint_status, self.node_remaining_demand)) return self.state def action_mask_calculator(self): action_mask = np.zeros(self.num_nodes) for i in range(self.num_nodes): next_node = self.node_in_sequence[i] if next_node == self.cur_node: continue elif self.cur_vehicle_remaining_capacity[ 0] + 0.01 / self.vehicle_capacity < self.demand[ next_node] / self.vehicle_capacity: continue elif (next_node != self.depot) & (self.node_remaining_demand[next_node] == 0): continue else: action_mask[i] = 1 self.action_mask = action_mask return self.action_mask def reset(self): self.cur_vehicle_id = 1 self.demand = [d for d in self.vrp_problem.node_demand] self.cur_node = self.node_in_sequence[self.depot] self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.vehicle_total_remaining_capacity = np.array( [self.num_trucks - 1.0]) self.cur_vehicle_remaining_capacity = np.array([1.0]) self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.node_remaining_demand = np.ones(self.num_nodes) self.cur_vehicle_position = np.zeros(self.num_nodes) self.cur_vehicle_position[self.depot] = 1.0 self.state = self.state_calculator() self.action_mask = self.action_mask_calculator() self.step_reward = 0 self.timestep = 0 if self.is_constraint_imposed: self.constraint_automaton.reset() self.constraint_status[:] = 0 self.constraint_status[ self.constraint_automaton.automata_state] = 1 return self.state, {'action_mask': self.action_mask} def step(self, action): self.step_reward = 0.0 dones = False next_node = self.node_in_sequence[action] cur_remaining_capacity = self.cur_vehicle_remaining_capacity[0] self.timestep += 1 self.cur_vehicle_remaining_capacity[0] = max( 0.0, cur_remaining_capacity - self.demand[next_node] / self.vehicle_capacity) self.step_reward += ( 1.0 - self.cost_matrix[self.cur_node][next_node] / self.max_cost) self.node_to_vehicle[:, next_node] = 0.0 visit_order = np.sum([ 1 if x > 0 else 0 for x in self.node_to_vehicle[self.cur_vehicle_id - 1, :] ]) self.node_to_vehicle[self.cur_vehicle_id - 1, next_node] = visit_order + 1.0 self.node_remaining_demand[next_node] = 0.0 if next_node == self.depot: if self.vehicle_total_remaining_capacity[0] > 0.0: self.cur_vehicle_remaining_capacity[0] = 1.0 self.vehicle_total_remaining_capacity[0] -= 1.0 else: self.cur_vehicle_remaining_capacity[0] = 0.0 self.cur_vehicle_id += 1 dones = ((self.cur_vehicle_id > self.num_trucks) | (self.timestep >= self.episode_len)) if self.is_constraint_imposed and self.constraint_automaton.is_accepted: self.constraint_automaton.step({ 'state': self.state, 'demand': self.node_remaining_demand, 'max_cost': self.max_cost, 'cost': self.cost_matrix[self.cur_node][next_node] }) if not self.constraint_automaton.is_accepted: self.step_reward = -(self.max_cost - self.min_cost) * 100 / self.max_cost dones = True self.constraint_status[:] = 0 self.constraint_status[ self.constraint_automaton.automata_state] = 1 self.cur_node = next_node self.cur_vehicle_position[:] = 0.0 self.cur_vehicle_position[self.cur_node] = 1.0 self.state = self.state_calculator() self.action_mask = self.action_mask_calculator() return self.state, self.step_reward, dones, { 'action_mask': self.action_mask, 'last_action': action } def get_ortool_value(self): return self.vrp_problem.get_ortool_opt_val() def path_cost(self, node_to_vehicle): node_on_path = [] for i in range(self.num_nodes): if node_to_vehicle[i] > 0.0 and i != self.depot: node_on_path.append((node_to_vehicle[i], i)) node_on_path = sorted(node_on_path, key=lambda x: x[0]) node_on_path = [x[1] for x in node_on_path] visited_node = [self.depot] + node_on_path pre_node = self.depot total_cost = 0.0 for cur_node in node_on_path: total_cost += self.cost_matrix[pre_node][cur_node] pre_node = cur_node return total_cost, visited_node def validity_check(self): for i in range(self.num_trucks): if np.dot(self.node_to_vehicle[i, :], self.demand) > self.vehicle_capacity: return False return True
def main(vrp_problem): data, manager, routing, solution = vrp_problem.ortool_solve() print_solution(data, manager, routing, solution) return ortool_solution_to_network(vrp_problem, data, manager, routing, solution) import argparse parser = argparse.ArgumentParser() parser.add_argument("--problem", type=str, default="A-n32-k5") if __name__ == '__main__': args = parser.parse_args() problem_name = args.problem vrp_problem = VRProblem(problem_name) plt.figure(figsize=(30, 30)) plt.axis("on") G, pos, route_edges, route_distance = main(vrp_problem) labels = {} for node in G.nodes(): labels[node] = node nx.draw_networkx_nodes(G, pos, node_size=1000) nx.draw_networkx_labels(G, pos, labels, font_size=30, font_color="black") cmap = matplotlib.cm.get_cmap('Spectral') for vehicle_id in route_edges.keys(): nx.draw_networkx_edges(G, pos, width=2, arrows=True, arrowsize=100,
class MAVRPSiteEnv(MultiAgentEnv): global_env_episilon = 0.8 def __init__(self, config): random.seed(os.getpid()) self.vrp_problem = VRProblem(config['problem']) self.get_ortool_value() self.config = config self.random_demand = self.config['demand_sample'] self.num_nodes = self.vrp_problem.num_nodes self.num_trucks = self.vrp_problem.num_trucks self.pox_x = self.vrp_problem.node_pos_x self.pox_y = self.vrp_problem.node_pos_y self.vehicle_capacity = self.vrp_problem.vehicle_capacity self.action_space = gym.spaces.Discrete(3) self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None) self.depot = self.vrp_problem.depots[0] - 1 self.demand = [d for d in self.vrp_problem.node_demand] self.max_demand = np.max(self.demand) self.min_demand = self.max_demand for i in range(self.num_nodes): if i != self.depot and self.demand[i] < self.min_demand: self.min_demand = self.demand[i] self.total_cost = np.sum(self.cost_matrix) self.max_cost = np.max(self.cost_matrix) self.median_cost = np.median(self.cost_matrix) self.timestep = 0 self.episode_len = config['episode_len'] self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.cur_vehicle_index = 0 self.vehicle_remaining_capacity = 1.0 self.all_vehicle_remaining_capacity = np.ones(self.num_trucks) self.vehicle_pos = np.array([self.depot] * self.num_trucks) self.cur_vehicle_pos = self.depot self.node_remaining_demand = np.array( [d / self.vehicle_capacity for d in self.demand]) self.node_in_sequence = self.determine_node_sequence() self.cur_node_index = 1 self.cur_node = self.node_in_sequence[self.cur_node_index] self.eval_mode = False MAVRPSiteEnv.global_env_episilon = 0.9999 * MAVRPSiteEnv.global_env_episilon + 0.0001 * 0.05 self.env_episilon = MAVRPSiteEnv.global_env_episilon self.state = None self.reset() self.state_dim = self.state.shape[0] self.observation_space = Box(low=0.0, high=self.num_trucks, shape=(self.state_dim, ), dtype=np.float64) self.latest_layer = self.num_nodes - 1 self.layer_cnt = 1 def state_calculator(self): vehicle_pos_and_cap = np.zeros(self.num_nodes) for i in range(self.num_trucks): vehicle_pos_and_cap[ self.vehicle_pos[i]] = self.all_vehicle_remaining_capacity[i] cur_node_pos_enc = np.zeros(self.num_nodes) cur_node_pos_enc[self.cur_node] = 1.0 cur_vehicle_pos_enc = np.zeros(self.num_nodes) cur_vehicle_pos_enc[self.cur_vehicle_pos] = 1.0 # node_cost_enc = np.zeros(self.num_nodes) # for i in range(self.num_nodes): # if ((i == self.depot) # or (self.node_remaining_demand[i] > 0 # and self.vehicle_remaining_capacity >= self.node_remaining_demand[i])): # node_cost_enc[i] = 1.0 - (self.max_cost - self.cost_matrix[self.cur_vehicle_pos][i]) / self.max_cost # make sure node_remaining_demand is the end of the state self.state = np.concatenate( (vehicle_pos_and_cap, cur_node_pos_enc, cur_vehicle_pos_enc, self.node_remaining_demand)) return self.state def sample_demand(self): remaining_demand_total = int(self.num_trucks * self.vehicle_capacity * random.randint(90, 95) / 100) random_demand = [self.min_demand] * self.num_nodes random_demand[self.depot] = 0.0 remaining_demand_total -= np.sum(random_demand) i = 0 while remaining_demand_total > 0: if i != self.depot and random_demand[i] < self.max_demand: if self.max_demand - self.min_demand < remaining_demand_total: rd = random.randint( 0, self.max_demand - max(random_demand[i], self.min_demand)) else: rd = remaining_demand_total random_demand[i] += rd remaining_demand_total -= rd i = (i + 1) % self.num_nodes self.demand = random_demand self.vrp_problem.set_demand(self.demand) def determine_node_sequence(self): angle_list = [] depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot]) for i in range(self.num_nodes): if i == self.depot: continue p = (self.pox_x[i], self.pox_y[i]) angle = compute_angle(depot_pos, p) angle_list.append((angle, i)) angle_list = sorted(angle_list, key=lambda x: x[0]) print(angle_list) cur_node_idx = 0 node_angle_dict = dict({x[1]: x[0] for x in angle_list}) new_node_in_sequence = [] while cur_node_idx < len(angle_list): _node_to_vehicle = np.zeros(self.num_nodes) _cur_demand = 0.0 _first_node_idx = cur_node_idx while cur_node_idx < len(angle_list): _cur_node = angle_list[cur_node_idx][1] if _cur_demand + self.demand[ _cur_node] <= self.vehicle_capacity: _cur_demand += self.demand[_cur_node] _node_to_vehicle[_cur_node] = 1 cur_node_idx += 1 else: break _, _path = self.path_cost(_node_to_vehicle) vehicle_path = _path[1:] if node_angle_dict[vehicle_path[0]] > node_angle_dict[ vehicle_path[-1]]: vehicle_path.reverse() new_node_in_sequence.extend(vehicle_path) return [self.depot] + new_node_in_sequence def set_mode(self, mode): self.eval_mode = mode def reset(self): if self.random_demand: self.sample_demand() self.latest_layer = self.num_nodes - 1 self.layer_cnt = 1 self.cur_vehicle_index = 0 MAVRPSiteEnv.global_env_episilon = 0.999 * MAVRPSiteEnv.global_env_episilon + 0.001 * 0.05 self.env_episilon = MAVRPSiteEnv.global_env_episilon self.eval_mode = False self.cur_node_index = 1 self.cur_node = self.node_in_sequence[self.cur_node_index] self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.step_reward = 0.0 self.timestep = 0 self.vehicle_remaining_capacity = 1.0 self.all_vehicle_remaining_capacity = np.ones(self.num_trucks) self.cur_vehicle_pos = self.depot self.node_remaining_demand = np.array( [d / self.vehicle_capacity for d in self.demand]) self.state = self.state_calculator() states = {0: self.state} layer = np.sum(self.node_remaining_demand > 0.0) infos = {0: {'layer': layer}} return states, infos def step(self, actions): action = actions[0] self.timestep += 1 self.step_reward = 0.0 layer = np.sum(self.node_remaining_demand > 0.0) if action in [1, 0]: if action == 1 and self.vehicle_remaining_capacity >= self.node_remaining_demand[ self.cur_node]: # valid move self.vehicle_remaining_capacity -= self.node_remaining_demand[ self.cur_node] self.all_vehicle_remaining_capacity[ self.cur_vehicle_index] -= self.node_remaining_demand[ self.cur_node] self.step_reward = (self.max_cost - self.cost_matrix[ self.cur_vehicle_pos][self.cur_node]) / self.max_cost self.cur_vehicle_pos = self.cur_node self.vehicle_pos[self.cur_vehicle_index] = self.cur_node self.node_remaining_demand[self.cur_node] = 0.0 self.node_to_vehicle[self.cur_vehicle_index, self.cur_node] = 1.0 if np.sum(self.node_remaining_demand) > 0.0: unvisited_node_idx = [] if self.node_remaining_demand[self.cur_node] > 0: unvisited_node_idx.append(self.cur_node_index) cur_idx = (self.cur_node_index + 1) % self.num_nodes while cur_idx != self.cur_node_index: if self.node_remaining_demand[ self.node_in_sequence[cur_idx]] > 0.0: unvisited_node_idx.append(cur_idx) cur_idx = (cur_idx + 1) % self.num_nodes if not self.eval_mode and random.random() <= self.env_episilon: self.cur_node_index = random.choice(unvisited_node_idx) else: self.cur_node_index = unvisited_node_idx[0] self.cur_node = self.node_in_sequence[self.cur_node_index] elif action == 2: # change a vehicle self.cur_vehicle_index = (self.cur_vehicle_index + 1) % self.num_trucks self.vehicle_remaining_capacity = self.all_vehicle_remaining_capacity[ self.cur_vehicle_index] self.cur_vehicle_pos = self.vehicle_pos[self.cur_vehicle_index] if np.sum(self.node_remaining_demand > 0.0) == self.latest_layer: self.layer_cnt += 1 else: self.layer_cnt = 1 self.latest_layer = np.sum(self.node_remaining_demand > 0.0) done = ((np.sum(self.node_remaining_demand) <= 0.0) | (self.timestep >= self.episode_len) | (self.layer_cnt > layer * self.num_trucks)) # if done: # self.step_reward -= np.sum(self.node_remaining_demand) # if np.sum(self.node_remaining_demand) <= 0.0: # for i in range(self.num_trucks): # self.step_reward += (self.max_cost - self.cost_matrix[self.vehicle_pos[i]][self.depot]) / self.max_cost self.state = self.state_calculator() states = {0: self.state} rewards = {0: self.step_reward} dones = {'__all__': done} infos = {0: {'layer': layer}} return states, rewards, dones, infos def get_ortool_value(self): return self.vrp_problem.get_ortool_opt_val() def path_cost(self, node_to_vehicle): node_mask = np.zeros(self.num_nodes) for i in range(self.num_nodes): if node_to_vehicle[i] > 0.0: node_mask[i] = 1 node_mask[self.depot] = 1 routing_distances, routing_paths = self.vrp_problem.get_ortool_opt_val_with_node_mask( node_mask) return routing_distances[0], routing_paths[0]
class MAVRPSiteEnv(MultiAgentEnv): def __init__(self, config): self.vrp_problem = VRProblem(config['problem']) self.get_ortool_value() self.config = config self.num_nodes = self.vrp_problem.num_nodes self.num_trucks = self.vrp_problem.num_trucks self.pox_x = self.vrp_problem.node_pos_x self.pox_y = self.vrp_problem.node_pos_y self.vehicle_capacity = self.vrp_problem.vehicle_capacity self.action_space = gym.spaces.Discrete(2) self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None) self.demand = [d for d in self.vrp_problem.node_demand] self.max_demand = np.max(self.demand) self.min_demand = np.min(self.demand) self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix) self.total_cost = np.sum(self.cost_matrix) self.max_cost = np.max(self.cost_matrix) self.min_cost = np.min(self.cost_matrix) self.depot = self.vrp_problem.depots[0] - 1 self.timestep = 0 self.episode_len = config['episode_len'] self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) self.node_remaining_demand = np.array( [d / self.vehicle_capacity for d in self.demand]) self.node_in_sequence = self.determine_node_sequence() # self.node_in_sequence = [self.depot] + self.vrp_problem.node_in_sequence_ortool # self.node_in_sequence = [0, 5, 10, 15, 14, 17, 26, 6, 23, 4, 11, 18, 9, 22, 25, 30, 16, 7, 13, 2, 3, 29, 20, 8, 28, 24, 27, 12, 1, 21, 31, 19] # self.node_in_sequence = list(range(self.num_nodes)) self.cur_node_index = 1 self.cur_node = self.node_in_sequence[self.cur_node_index] self.vehicle_agents = [ VehicleAgent(i, self) for i in range(self.num_trucks) ] self.global_agent = GlobalAgent(self) self.state = None self.reset() self.state_dim = self.state.shape[0] self.observation_space = Box(low=0.0, high=self.num_trucks, shape=(self.state_dim, ), dtype=np.float64) def determine_node_sequence(self): angle_list = [] depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot]) for i in range(self.num_nodes): if i == self.depot: continue p = (self.pox_x[i], self.pox_y[i]) angle = compute_angle(depot_pos, p) angle_list.append((angle, i)) angle_list = sorted(angle_list, key=lambda x: x[0]) print(angle_list) new_node_in_sequence = [] cur_node_idx = 0 while cur_node_idx < len(angle_list): _node_to_vehicle = np.zeros(self.num_nodes) _cur_demand = 0.0 _first_node_idx = cur_node_idx while cur_node_idx < len( angle_list) and abs(angle_list[cur_node_idx][0] - angle_list[_first_node_idx][0]) <= max( 30, 720 // self.num_trucks): _cur_node = angle_list[cur_node_idx][1] if _cur_demand + self.demand[ _cur_node] <= self.vehicle_capacity: _cur_demand += self.demand[_cur_node] _node_to_vehicle[_cur_node] = 1 cur_node_idx += 1 else: break _cost, _path = self.path_cost(_node_to_vehicle) new_node_in_sequence.extend(_path[1:]) return [self.depot] + new_node_in_sequence def state_calculator(self, vehicle_id): cur_node_encoded = np.zeros(self.num_nodes) cur_node_encoded[self.cur_node] = 1.0 vehicle_remaining_capacity = np.zeros(self.num_nodes) cur_vehicle_pos_encoded = np.zeros(self.num_nodes) if vehicle_id >= 0: for vehicle in self.vehicle_agents: vehicle_remaining_capacity[ vehicle.cur_node] += vehicle.remaining_capacity cur_vehicle_remaining_capacity = np.array( [self.vehicle_agents[vehicle_id].remaining_capacity]) cur_vehicle_pos_encoded[ self.vehicle_agents[vehicle_id].cur_node] = 1.0 _node_remaining_demand = np.array([ (d if d <= cur_vehicle_remaining_capacity[0] else 0.0) for d in self.node_remaining_demand ]) else: cur_vehicle_remaining_capacity = np.array([0.0]) _node_remaining_demand = np.array( [d for d in self.node_remaining_demand]) # cur_vehicle_encoded = np.zeros(self.num_trucks) # cur_vehicle_encoded[vehicle_id] = 1.0 self.state = np.concatenate( (cur_vehicle_pos_encoded, cur_vehicle_remaining_capacity, vehicle_remaining_capacity, cur_node_encoded, _node_remaining_demand)) return self.state def reset_node_sequence(self): pass # new_node_in_sequence = list(range(1, self.num_nodes)) # random.shuffle(new_node_in_sequence) # self.node_in_sequence = [self.depot] + new_node_in_sequence # new_node_in_sequence = [] # truck_list = list(range(self.num_trucks)) # for i in truck_list: # _node_to_vehicle = self.node_to_vehicle[i, :] # _cost, _path = self.path_cost(_node_to_vehicle) # new_node_in_sequence.extend(_path[1:]) # self.node_in_sequence = new_node_in_sequence # print(self.node_in_sequence) def reset(self): self.cur_node_index = 1 self.cur_node = self.node_in_sequence[self.cur_node_index] self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes)) [v.reset() for v in self.vehicle_agents] self.node_remaining_demand = np.array( [d / self.vehicle_capacity for d in self.demand]) self.step_reward = 0.0 self.timestep = 0 states = dict() self.global_agent.reset() states[-1] = self.state_calculator(-1) for vehicle_id in range(self.num_trucks): states[vehicle_id] = self.state_calculator(vehicle_id) return states, {} def step(self, actions): self.timestep += 1 self.step_reward = 0.0 states, rewards, dones, infos = dict(), dict(), dict(), dict() dones['__all__'] = ((np.sum(self.node_remaining_demand) <= 0.0) | (self.timestep >= self.episode_len)) if not dones['__all__']: while True: self.cur_node_index = (self.cur_node_index + 1) % self.num_nodes self.cur_node = self.node_in_sequence[self.cur_node_index] if self.cur_node != self.depot and self.node_remaining_demand[ self.cur_node] > 0: break states[-1] = self.state_calculator(-1) for vehicle_id in range(self.num_trucks): states[vehicle_id] = self.state_calculator(vehicle_id) rewards[vehicle_id] = self.vehicle_agents[vehicle_id].step_reward if np.sum(self.node_remaining_demand) <= 0.0: rewards[vehicle_id] += ( 1.0 - self.cost_matrix[self.vehicle_agents[vehicle_id].cur_node][ self.depot] / self.max_cost) total_agent_reward = np.sum([r for r in rewards.values()]) rewards[ -1] = total_agent_reward # total_agent_reward + self.global_agent.step_reward # if global_action == 1 and prev_node != self.cur_node: # rewards[-1] += (1.0 - self.cost_matrix[prev_node][self.cur_node] / self.max_cost) for vehicle_id in range(self.num_trucks): rewards[vehicle_id] = total_agent_reward return states, rewards, dones, infos def get_ortool_value(self): return self.vrp_problem.get_ortool_opt_val() def path_cost(self, node_to_vehicle): node_mask = np.zeros(self.num_nodes) for i in range(self.num_nodes): if node_to_vehicle[i] > 0.0: node_mask[i] = 1 node_mask[self.depot] = 1 routing_distances, routing_paths = self.vrp_problem.get_ortool_opt_val_with_node_mask( node_mask) return routing_distances[0], routing_paths[0]