Example #1
0
    def __init__(self, config):
        random.seed(os.getpid())
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.random_demand = self.config['demand_sample']
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(3)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.depot = self.vrp_problem.depots[0] - 1

        self.demand = [d for d in self.vrp_problem.node_demand]

        self.max_demand = np.max(self.demand)
        self.min_demand = self.max_demand
        for i in range(self.num_nodes):
            if i != self.depot and self.demand[i] < self.min_demand:
                self.min_demand = self.demand[i]

        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.median_cost = np.median(self.cost_matrix)

        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_vehicle_index = 0

        self.vehicle_remaining_capacity = 1.0
        self.all_vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.vehicle_pos = np.array([self.depot] * self.num_trucks)
        self.cur_vehicle_pos = self.depot
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.eval_mode = False
        MAVRPSiteEnv.global_env_episilon = 0.9999 * MAVRPSiteEnv.global_env_episilon + 0.0001 * 0.05
        self.env_episilon = MAVRPSiteEnv.global_env_episilon

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

        self.latest_layer = self.num_nodes - 1
        self.layer_cnt = 1
Example #2
0
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(self.num_trucks)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates()
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)

        self.is_constraint_imposed = False
        self.constraint_automaton = CVPRConstraints(cvrp_constraints[0])

        self.vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)
        self.node_remaining_demand = np.zeros(self.num_nodes)
        self.vehicle_position = np.zeros((self.num_trucks, self.num_nodes))
        self.constraint_status = np.zeros(
            self.constraint_automaton.num_constraint_states)

        self.depot = self.vrp_problem.depots[0] - 1
        self.cur_node = self.depot
        self.cur_node_index = 0
        self.step_reward = 0
        self.timestep = 0
        self.episode_len = self.num_nodes * 100
        self.cur_vehicle_id = 0
        self.node_in_sequence = []

        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        self.node_in_sequence = [a[1] for a in angle_list]

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)
Example #3
0
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(2)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.min_cost = np.min(self.cost_matrix)

        self.depot = self.vrp_problem.depots[0] - 1
        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))

        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()
        # self.node_in_sequence = [self.depot] + self.vrp_problem.node_in_sequence_ortool
        # self.node_in_sequence = [0, 5, 10, 15, 14, 17, 26, 6, 23, 4, 11, 18, 9, 22, 25, 30, 16, 7, 13, 2, 3, 29, 20, 8, 28, 24, 27, 12, 1, 21, 31, 19]

        # self.node_in_sequence = list(range(self.num_nodes))

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.vehicle_agents = [
            VehicleAgent(i, self) for i in range(self.num_trucks)
        ]
        self.global_agent = GlobalAgent(self)

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)
Example #4
0
class CVRPEnv(gym.Env):
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(self.num_trucks)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates()
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)

        self.is_constraint_imposed = False
        self.constraint_automaton = CVPRConstraints(cvrp_constraints[0])

        self.vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)
        self.node_remaining_demand = np.zeros(self.num_nodes)
        self.vehicle_position = np.zeros((self.num_trucks, self.num_nodes))
        self.constraint_status = np.zeros(
            self.constraint_automaton.num_constraint_states)

        self.depot = self.vrp_problem.depots[0] - 1
        self.cur_node = self.depot
        self.cur_node_index = 0
        self.step_reward = 0
        self.timestep = 0
        self.episode_len = self.num_nodes * 100
        self.cur_vehicle_id = 0
        self.node_in_sequence = []

        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        self.node_in_sequence = [a[1] for a in angle_list]

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def reset(self):
        self.cur_node_index = 0
        # random.shuffle(self.node_in_sequence)
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.cur_node = self.node_in_sequence[self.cur_node_index]
        self.vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)

        # self.node_to_vehicle[:, self.cur_node] = 1.0
        self.cur_node_encoded[self.cur_node] = 1.0
        # self.node_remaining_demand = np.array([d/self.vehicle_capacity for d in self.demand])
        self.node_remaining_demand = np.ones(self.num_nodes)
        self.node_remaining_demand[self.depot] = 0.0
        self.vehicle_position_encoded = np.zeros(self.num_nodes)
        self.vehicle_position_encoded[self.depot] = self.num_trucks
        self.vehicle_position = np.zeros(self.num_trucks)
        self.vehicle_position[:] = self.depot

        self.state = self.state_calculator()

        self.step_reward = 0
        self.timestep = 0

        if self.is_constraint_imposed:
            self.constraint_automaton.reset()
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1
        return np.array(self.state)

    def path_cost(self, node_to_vehicle):
        node_on_path = []
        for i in range(self.num_nodes):
            if node_to_vehicle[i] == 1.0 and i != self.depot:
                node_on_path.append(i)
        visited_node = [self.depot]
        cur_node = self.depot
        total_cost = 0.0
        for _ in range(len(node_on_path)):
            cost = []
            for node in node_on_path:
                if node not in visited_node:
                    cost.append((node, self.cost_matrix[cur_node][node]))
            cost = sorted(cost, key=(lambda x: x[1]))
            cur_node = cost[0][0]
            total_cost += cost[0][1]
            visited_node.append(cur_node)
        return total_cost, visited_node

    def validity_check(self):
        for i in range(self.num_trucks):
            if np.dot(self.node_to_vehicle[i, :],
                      self.demand) > self.vehicle_capacity:
                return False
        return True

    def state_calculator(self):
        return np.concatenate(
            (self.vehicle_remaining_capacity, self.vehicle_position_encoded,
             self.node_remaining_demand, self.constraint_status))
        # return np.concatenate((self.vehicle_remaining_capacity,
        #                        self.node_to_vehicle.flatten(),
        #                        self.vehicle_position.flatten(),
        #                        self.cur_node_encoded,
        #                        self.node_remaining_demand))

    def step(self, action):
        self.step_reward = 0.0
        dones = False
        self.node_to_vehicle[action, self.cur_node] = 1.0
        cur_vehicle_pos = int(self.vehicle_position[action])

        if self.vehicle_remaining_capacity[
                action] + 1.0 / self.vehicle_capacity > self.demand[
                    self.cur_node] / self.vehicle_capacity:
            self.vehicle_remaining_capacity[action] = max(
                0.0, self.vehicle_remaining_capacity[action] -
                self.demand[self.cur_node] / self.vehicle_capacity)
            self.step_reward = -self.cost_matrix[cur_vehicle_pos][
                self.cur_node] / self.max_cost
        else:
            self.vehicle_remaining_capacity[action] = 0.0
            self.step_reward = -1.0 - self.cost_matrix[cur_vehicle_pos][
                self.cur_node] / self.max_cost

        self.vehicle_position_encoded[self.cur_node] -= 1.0
        self.node_remaining_demand[self.cur_node] = 0.0
        self.cur_node_encoded[self.cur_node] = 0.0
        self.cur_node_index += 1
        dones = (dones | (self.cur_node_index >= len(self.node_in_sequence)))
        if not dones:
            self.cur_node = self.node_in_sequence[self.cur_node_index]
        else:
            self.cur_node = self.depot

        self.vehicle_position_encoded[self.cur_node] += 1.0
        self.cur_node_encoded[self.cur_node] = 1.0
        self.vehicle_position[action] = self.cur_node

        if self.is_constraint_imposed:
            self.constraint_automaton.step(self.node_to_vehicle)
            if not self.constraint_automaton.is_accepted:
                self.step_reward -= 1.0
            self.constraint_status[:] = 0
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1

        self.state = self.state_calculator()
        return np.array(self.state), self.step_reward, dones, {}
Example #5
0
class CVRPSiteEnv(gym.Env):
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(self.num_nodes)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates()
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.min_cost = np.min(self.cost_matrix)

        self.is_constraint_imposed = False
        self.constraint_automaton = CVPRConstraints(
            cvrp_constraints[config['constraint_id']])

        self.depot = self.vrp_problem.depots[0] - 1
        self.cur_node = self.depot
        self.cur_node_index = 0
        self.step_reward = 0
        self.timestep = 0
        self.episode_len = self.num_nodes * 20
        self.cur_vehicle_id = 1
        self.node_in_sequence = []

        self.vehicle_total_remaining_capacity = np.array(
            [self.num_trucks - 1.0])
        self.cur_vehicle_remaining_capacity = np.array([1.0])
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)
        self.node_remaining_demand = np.zeros(self.num_nodes)
        self.cur_vehicle_position = np.zeros(self.num_nodes)
        self.cur_vehicle_position[self.depot] = 1.0
        self.constraint_status = np.zeros(
            self.constraint_automaton.num_constraint_states)

        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        self.node_in_sequence = [self.depot] + [a[1] for a in angle_list]
        # self.node_in_sequence = [i for i in range(self.num_nodes)]

        self.state = None
        self.action_mask = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks + 1,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

    def state_calculator(self):
        self.state = np.concatenate(
            (self.cur_vehicle_remaining_capacity, self.cur_vehicle_position,
             self.constraint_status, self.node_remaining_demand))
        return self.state

    def action_mask_calculator(self):
        action_mask = np.zeros(self.num_nodes)
        for i in range(self.num_nodes):
            next_node = self.node_in_sequence[i]
            if next_node == self.cur_node:
                continue
            elif self.cur_vehicle_remaining_capacity[
                    0] + 0.01 / self.vehicle_capacity < self.demand[
                        next_node] / self.vehicle_capacity:
                continue
            elif (next_node !=
                  self.depot) & (self.node_remaining_demand[next_node] == 0):
                continue
            else:
                action_mask[i] = 1
        self.action_mask = action_mask
        return self.action_mask

    def reset(self):
        self.cur_vehicle_id = 1
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.cur_node = self.node_in_sequence[self.depot]
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.vehicle_total_remaining_capacity = np.array(
            [self.num_trucks - 1.0])
        self.cur_vehicle_remaining_capacity = np.array([1.0])
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.node_remaining_demand = np.ones(self.num_nodes)
        self.cur_vehicle_position = np.zeros(self.num_nodes)
        self.cur_vehicle_position[self.depot] = 1.0

        self.state = self.state_calculator()
        self.action_mask = self.action_mask_calculator()

        self.step_reward = 0
        self.timestep = 0

        if self.is_constraint_imposed:
            self.constraint_automaton.reset()
            self.constraint_status[:] = 0
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1
        return self.state, {'action_mask': self.action_mask}

    def step(self, action):
        self.step_reward = 0.0
        dones = False
        next_node = self.node_in_sequence[action]
        cur_remaining_capacity = self.cur_vehicle_remaining_capacity[0]
        self.timestep += 1

        self.cur_vehicle_remaining_capacity[0] = max(
            0.0, cur_remaining_capacity -
            self.demand[next_node] / self.vehicle_capacity)
        self.step_reward += (
            1.0 - self.cost_matrix[self.cur_node][next_node] / self.max_cost)

        self.node_to_vehicle[:, next_node] = 0.0
        visit_order = np.sum([
            1 if x > 0 else 0
            for x in self.node_to_vehicle[self.cur_vehicle_id - 1, :]
        ])
        self.node_to_vehicle[self.cur_vehicle_id - 1,
                             next_node] = visit_order + 1.0

        self.node_remaining_demand[next_node] = 0.0

        if next_node == self.depot:
            if self.vehicle_total_remaining_capacity[0] > 0.0:
                self.cur_vehicle_remaining_capacity[0] = 1.0
                self.vehicle_total_remaining_capacity[0] -= 1.0
            else:
                self.cur_vehicle_remaining_capacity[0] = 0.0
            self.cur_vehicle_id += 1

        dones = ((self.cur_vehicle_id > self.num_trucks) |
                 (self.timestep >= self.episode_len))

        if self.is_constraint_imposed and self.constraint_automaton.is_accepted:
            self.constraint_automaton.step({
                'state':
                self.state,
                'demand':
                self.node_remaining_demand,
                'max_cost':
                self.max_cost,
                'cost':
                self.cost_matrix[self.cur_node][next_node]
            })
            if not self.constraint_automaton.is_accepted:
                self.step_reward = -(self.max_cost -
                                     self.min_cost) * 100 / self.max_cost
                dones = True
            self.constraint_status[:] = 0
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1

        self.cur_node = next_node
        self.cur_vehicle_position[:] = 0.0
        self.cur_vehicle_position[self.cur_node] = 1.0

        self.state = self.state_calculator()
        self.action_mask = self.action_mask_calculator()
        return self.state, self.step_reward, dones, {
            'action_mask': self.action_mask,
            'last_action': action
        }

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def path_cost(self, node_to_vehicle):
        node_on_path = []
        for i in range(self.num_nodes):
            if node_to_vehicle[i] > 0.0 and i != self.depot:
                node_on_path.append((node_to_vehicle[i], i))
        node_on_path = sorted(node_on_path, key=lambda x: x[0])
        node_on_path = [x[1] for x in node_on_path]
        visited_node = [self.depot] + node_on_path
        pre_node = self.depot
        total_cost = 0.0
        for cur_node in node_on_path:
            total_cost += self.cost_matrix[pre_node][cur_node]
            pre_node = cur_node
        return total_cost, visited_node

    def validity_check(self):
        for i in range(self.num_trucks):
            if np.dot(self.node_to_vehicle[i, :],
                      self.demand) > self.vehicle_capacity:
                return False
        return True
Example #6
0
def main(vrp_problem):
    data, manager, routing, solution = vrp_problem.ortool_solve()
    print_solution(data, manager, routing, solution)
    return ortool_solution_to_network(vrp_problem, data, manager, routing,
                                      solution)


import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--problem", type=str, default="A-n32-k5")

if __name__ == '__main__':
    args = parser.parse_args()
    problem_name = args.problem
    vrp_problem = VRProblem(problem_name)
    plt.figure(figsize=(30, 30))
    plt.axis("on")
    G, pos, route_edges, route_distance = main(vrp_problem)
    labels = {}
    for node in G.nodes():
        labels[node] = node
    nx.draw_networkx_nodes(G, pos, node_size=1000)
    nx.draw_networkx_labels(G, pos, labels, font_size=30, font_color="black")
    cmap = matplotlib.cm.get_cmap('Spectral')
    for vehicle_id in route_edges.keys():
        nx.draw_networkx_edges(G,
                               pos,
                               width=2,
                               arrows=True,
                               arrowsize=100,
Example #7
0
class MAVRPSiteEnv(MultiAgentEnv):
    global_env_episilon = 0.8

    def __init__(self, config):
        random.seed(os.getpid())
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.random_demand = self.config['demand_sample']
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(3)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.depot = self.vrp_problem.depots[0] - 1

        self.demand = [d for d in self.vrp_problem.node_demand]

        self.max_demand = np.max(self.demand)
        self.min_demand = self.max_demand
        for i in range(self.num_nodes):
            if i != self.depot and self.demand[i] < self.min_demand:
                self.min_demand = self.demand[i]

        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.median_cost = np.median(self.cost_matrix)

        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_vehicle_index = 0

        self.vehicle_remaining_capacity = 1.0
        self.all_vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.vehicle_pos = np.array([self.depot] * self.num_trucks)
        self.cur_vehicle_pos = self.depot
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.eval_mode = False
        MAVRPSiteEnv.global_env_episilon = 0.9999 * MAVRPSiteEnv.global_env_episilon + 0.0001 * 0.05
        self.env_episilon = MAVRPSiteEnv.global_env_episilon

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

        self.latest_layer = self.num_nodes - 1
        self.layer_cnt = 1

    def state_calculator(self):
        vehicle_pos_and_cap = np.zeros(self.num_nodes)
        for i in range(self.num_trucks):
            vehicle_pos_and_cap[
                self.vehicle_pos[i]] = self.all_vehicle_remaining_capacity[i]

        cur_node_pos_enc = np.zeros(self.num_nodes)
        cur_node_pos_enc[self.cur_node] = 1.0
        cur_vehicle_pos_enc = np.zeros(self.num_nodes)
        cur_vehicle_pos_enc[self.cur_vehicle_pos] = 1.0

        # node_cost_enc = np.zeros(self.num_nodes)
        # for i in range(self.num_nodes):
        #     if ((i == self.depot)
        #          or (self.node_remaining_demand[i] > 0
        #           and self.vehicle_remaining_capacity >= self.node_remaining_demand[i])):
        #         node_cost_enc[i] = 1.0 - (self.max_cost - self.cost_matrix[self.cur_vehicle_pos][i]) / self.max_cost

        # make sure node_remaining_demand is the end of the state
        self.state = np.concatenate(
            (vehicle_pos_and_cap, cur_node_pos_enc, cur_vehicle_pos_enc,
             self.node_remaining_demand))
        return self.state

    def sample_demand(self):
        remaining_demand_total = int(self.num_trucks * self.vehicle_capacity *
                                     random.randint(90, 95) / 100)
        random_demand = [self.min_demand] * self.num_nodes
        random_demand[self.depot] = 0.0
        remaining_demand_total -= np.sum(random_demand)
        i = 0
        while remaining_demand_total > 0:
            if i != self.depot and random_demand[i] < self.max_demand:
                if self.max_demand - self.min_demand < remaining_demand_total:
                    rd = random.randint(
                        0, self.max_demand -
                        max(random_demand[i], self.min_demand))
                else:
                    rd = remaining_demand_total
                random_demand[i] += rd
                remaining_demand_total -= rd
            i = (i + 1) % self.num_nodes
        self.demand = random_demand
        self.vrp_problem.set_demand(self.demand)

    def determine_node_sequence(self):
        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        print(angle_list)
        cur_node_idx = 0
        node_angle_dict = dict({x[1]: x[0] for x in angle_list})
        new_node_in_sequence = []
        while cur_node_idx < len(angle_list):
            _node_to_vehicle = np.zeros(self.num_nodes)
            _cur_demand = 0.0
            _first_node_idx = cur_node_idx
            while cur_node_idx < len(angle_list):
                _cur_node = angle_list[cur_node_idx][1]
                if _cur_demand + self.demand[
                        _cur_node] <= self.vehicle_capacity:
                    _cur_demand += self.demand[_cur_node]
                    _node_to_vehicle[_cur_node] = 1
                    cur_node_idx += 1
                else:
                    break
            _, _path = self.path_cost(_node_to_vehicle)
            vehicle_path = _path[1:]
            if node_angle_dict[vehicle_path[0]] > node_angle_dict[
                    vehicle_path[-1]]:
                vehicle_path.reverse()
            new_node_in_sequence.extend(vehicle_path)
        return [self.depot] + new_node_in_sequence

    def set_mode(self, mode):
        self.eval_mode = mode

    def reset(self):
        if self.random_demand:
            self.sample_demand()
        self.latest_layer = self.num_nodes - 1
        self.layer_cnt = 1
        self.cur_vehicle_index = 0
        MAVRPSiteEnv.global_env_episilon = 0.999 * MAVRPSiteEnv.global_env_episilon + 0.001 * 0.05
        self.env_episilon = MAVRPSiteEnv.global_env_episilon
        self.eval_mode = False
        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.step_reward = 0.0
        self.timestep = 0
        self.vehicle_remaining_capacity = 1.0
        self.all_vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.cur_vehicle_pos = self.depot
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])
        self.state = self.state_calculator()
        states = {0: self.state}
        layer = np.sum(self.node_remaining_demand > 0.0)
        infos = {0: {'layer': layer}}
        return states, infos

    def step(self, actions):
        action = actions[0]
        self.timestep += 1
        self.step_reward = 0.0
        layer = np.sum(self.node_remaining_demand > 0.0)
        if action in [1, 0]:
            if action == 1 and self.vehicle_remaining_capacity >= self.node_remaining_demand[
                    self.cur_node]:  # valid move
                self.vehicle_remaining_capacity -= self.node_remaining_demand[
                    self.cur_node]
                self.all_vehicle_remaining_capacity[
                    self.cur_vehicle_index] -= self.node_remaining_demand[
                        self.cur_node]
                self.step_reward = (self.max_cost - self.cost_matrix[
                    self.cur_vehicle_pos][self.cur_node]) / self.max_cost
                self.cur_vehicle_pos = self.cur_node
                self.vehicle_pos[self.cur_vehicle_index] = self.cur_node
                self.node_remaining_demand[self.cur_node] = 0.0
                self.node_to_vehicle[self.cur_vehicle_index,
                                     self.cur_node] = 1.0

            if np.sum(self.node_remaining_demand) > 0.0:
                unvisited_node_idx = []
                if self.node_remaining_demand[self.cur_node] > 0:
                    unvisited_node_idx.append(self.cur_node_index)
                cur_idx = (self.cur_node_index + 1) % self.num_nodes
                while cur_idx != self.cur_node_index:
                    if self.node_remaining_demand[
                            self.node_in_sequence[cur_idx]] > 0.0:
                        unvisited_node_idx.append(cur_idx)
                    cur_idx = (cur_idx + 1) % self.num_nodes

                if not self.eval_mode and random.random() <= self.env_episilon:
                    self.cur_node_index = random.choice(unvisited_node_idx)
                else:
                    self.cur_node_index = unvisited_node_idx[0]
                self.cur_node = self.node_in_sequence[self.cur_node_index]
        elif action == 2:  # change a vehicle
            self.cur_vehicle_index = (self.cur_vehicle_index +
                                      1) % self.num_trucks
            self.vehicle_remaining_capacity = self.all_vehicle_remaining_capacity[
                self.cur_vehicle_index]
            self.cur_vehicle_pos = self.vehicle_pos[self.cur_vehicle_index]

        if np.sum(self.node_remaining_demand > 0.0) == self.latest_layer:
            self.layer_cnt += 1
        else:
            self.layer_cnt = 1
            self.latest_layer = np.sum(self.node_remaining_demand > 0.0)

        done = ((np.sum(self.node_remaining_demand) <= 0.0) |
                (self.timestep >= self.episode_len) |
                (self.layer_cnt > layer * self.num_trucks))
        # if done:
        #     self.step_reward -= np.sum(self.node_remaining_demand)
        #     if np.sum(self.node_remaining_demand) <= 0.0:
        #         for i in range(self.num_trucks):
        #             self.step_reward += (self.max_cost - self.cost_matrix[self.vehicle_pos[i]][self.depot]) / self.max_cost

        self.state = self.state_calculator()
        states = {0: self.state}
        rewards = {0: self.step_reward}
        dones = {'__all__': done}
        infos = {0: {'layer': layer}}
        return states, rewards, dones, infos

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def path_cost(self, node_to_vehicle):
        node_mask = np.zeros(self.num_nodes)
        for i in range(self.num_nodes):
            if node_to_vehicle[i] > 0.0:
                node_mask[i] = 1
        node_mask[self.depot] = 1
        routing_distances, routing_paths = self.vrp_problem.get_ortool_opt_val_with_node_mask(
            node_mask)
        return routing_distances[0], routing_paths[0]
Example #8
0
class MAVRPSiteEnv(MultiAgentEnv):
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(2)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.min_cost = np.min(self.cost_matrix)

        self.depot = self.vrp_problem.depots[0] - 1
        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))

        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()
        # self.node_in_sequence = [self.depot] + self.vrp_problem.node_in_sequence_ortool
        # self.node_in_sequence = [0, 5, 10, 15, 14, 17, 26, 6, 23, 4, 11, 18, 9, 22, 25, 30, 16, 7, 13, 2, 3, 29, 20, 8, 28, 24, 27, 12, 1, 21, 31, 19]

        # self.node_in_sequence = list(range(self.num_nodes))

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.vehicle_agents = [
            VehicleAgent(i, self) for i in range(self.num_trucks)
        ]
        self.global_agent = GlobalAgent(self)

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

    def determine_node_sequence(self):
        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        print(angle_list)
        new_node_in_sequence = []
        cur_node_idx = 0
        while cur_node_idx < len(angle_list):
            _node_to_vehicle = np.zeros(self.num_nodes)
            _cur_demand = 0.0
            _first_node_idx = cur_node_idx
            while cur_node_idx < len(
                    angle_list) and abs(angle_list[cur_node_idx][0] -
                                        angle_list[_first_node_idx][0]) <= max(
                                            30, 720 // self.num_trucks):
                _cur_node = angle_list[cur_node_idx][1]
                if _cur_demand + self.demand[
                        _cur_node] <= self.vehicle_capacity:
                    _cur_demand += self.demand[_cur_node]
                    _node_to_vehicle[_cur_node] = 1
                    cur_node_idx += 1
                else:
                    break
            _cost, _path = self.path_cost(_node_to_vehicle)
            new_node_in_sequence.extend(_path[1:])
        return [self.depot] + new_node_in_sequence

    def state_calculator(self, vehicle_id):
        cur_node_encoded = np.zeros(self.num_nodes)
        cur_node_encoded[self.cur_node] = 1.0
        vehicle_remaining_capacity = np.zeros(self.num_nodes)
        cur_vehicle_pos_encoded = np.zeros(self.num_nodes)

        if vehicle_id >= 0:
            for vehicle in self.vehicle_agents:
                vehicle_remaining_capacity[
                    vehicle.cur_node] += vehicle.remaining_capacity
            cur_vehicle_remaining_capacity = np.array(
                [self.vehicle_agents[vehicle_id].remaining_capacity])
            cur_vehicle_pos_encoded[
                self.vehicle_agents[vehicle_id].cur_node] = 1.0
            _node_remaining_demand = np.array([
                (d if d <= cur_vehicle_remaining_capacity[0] else 0.0)
                for d in self.node_remaining_demand
            ])
        else:
            cur_vehicle_remaining_capacity = np.array([0.0])
            _node_remaining_demand = np.array(
                [d for d in self.node_remaining_demand])

        # cur_vehicle_encoded = np.zeros(self.num_trucks)
        # cur_vehicle_encoded[vehicle_id] = 1.0

        self.state = np.concatenate(
            (cur_vehicle_pos_encoded, cur_vehicle_remaining_capacity,
             vehicle_remaining_capacity, cur_node_encoded,
             _node_remaining_demand))
        return self.state

    def reset_node_sequence(self):
        pass
        # new_node_in_sequence = list(range(1, self.num_nodes))
        # random.shuffle(new_node_in_sequence)
        # self.node_in_sequence = [self.depot] + new_node_in_sequence
        # new_node_in_sequence = []
        # truck_list = list(range(self.num_trucks))
        # for i in truck_list:
        #     _node_to_vehicle = self.node_to_vehicle[i, :]
        #     _cost, _path = self.path_cost(_node_to_vehicle)
        #     new_node_in_sequence.extend(_path[1:])
        # self.node_in_sequence = new_node_in_sequence
        # print(self.node_in_sequence)

    def reset(self):
        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        [v.reset() for v in self.vehicle_agents]
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])
        self.step_reward = 0.0
        self.timestep = 0
        states = dict()
        self.global_agent.reset()
        states[-1] = self.state_calculator(-1)
        for vehicle_id in range(self.num_trucks):
            states[vehicle_id] = self.state_calculator(vehicle_id)
        return states, {}

    def step(self, actions):
        self.timestep += 1
        self.step_reward = 0.0
        states, rewards, dones, infos = dict(), dict(), dict(), dict()

        dones['__all__'] = ((np.sum(self.node_remaining_demand) <= 0.0) |
                            (self.timestep >= self.episode_len))

        if not dones['__all__']:
            while True:
                self.cur_node_index = (self.cur_node_index +
                                       1) % self.num_nodes
                self.cur_node = self.node_in_sequence[self.cur_node_index]
                if self.cur_node != self.depot and self.node_remaining_demand[
                        self.cur_node] > 0:
                    break

        states[-1] = self.state_calculator(-1)
        for vehicle_id in range(self.num_trucks):
            states[vehicle_id] = self.state_calculator(vehicle_id)
            rewards[vehicle_id] = self.vehicle_agents[vehicle_id].step_reward
            if np.sum(self.node_remaining_demand) <= 0.0:
                rewards[vehicle_id] += (
                    1.0 -
                    self.cost_matrix[self.vehicle_agents[vehicle_id].cur_node][
                        self.depot] / self.max_cost)

        total_agent_reward = np.sum([r for r in rewards.values()])

        rewards[
            -1] = total_agent_reward  # total_agent_reward + self.global_agent.step_reward
        # if global_action == 1 and prev_node != self.cur_node:
        #     rewards[-1] += (1.0 - self.cost_matrix[prev_node][self.cur_node] / self.max_cost)
        for vehicle_id in range(self.num_trucks):
            rewards[vehicle_id] = total_agent_reward
        return states, rewards, dones, infos

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def path_cost(self, node_to_vehicle):
        node_mask = np.zeros(self.num_nodes)
        for i in range(self.num_nodes):
            if node_to_vehicle[i] > 0.0:
                node_mask[i] = 1
        node_mask[self.depot] = 1
        routing_distances, routing_paths = self.vrp_problem.get_ortool_opt_val_with_node_mask(
            node_mask)
        return routing_distances[0], routing_paths[0]