Python VRProblem Examples

Programming Language: Python

Namespace/Package Name: utils.utils

Class/Type: VRProblem

Examples at hotexamples.com: 8

Python VRProblem - 8 examples found. These are the top rated real world Python examples of utils.utils.VRProblem extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

VRProblem(4)

get_nodes_coordinates(4)

get_ortool_opt_val(4)

get_ortool_opt_val_with_node_mask(2)

set_demand(1)

Example #1

Show file

File: ma_dp_cvrp_env.py Project: songCNMS/vrp

    def __init__(self, config):
        random.seed(os.getpid())
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.random_demand = self.config['demand_sample']
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(3)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.depot = self.vrp_problem.depots[0] - 1

        self.demand = [d for d in self.vrp_problem.node_demand]

        self.max_demand = np.max(self.demand)
        self.min_demand = self.max_demand
        for i in range(self.num_nodes):
            if i != self.depot and self.demand[i] < self.min_demand:
                self.min_demand = self.demand[i]

        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.median_cost = np.median(self.cost_matrix)

        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_vehicle_index = 0

        self.vehicle_remaining_capacity = 1.0
        self.all_vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.vehicle_pos = np.array([self.depot] * self.num_trucks)
        self.cur_vehicle_pos = self.depot
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.eval_mode = False
        MAVRPSiteEnv.global_env_episilon = 0.9999 * MAVRPSiteEnv.global_env_episilon + 0.0001 * 0.05
        self.env_episilon = MAVRPSiteEnv.global_env_episilon

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

        self.latest_layer = self.num_nodes - 1
        self.layer_cnt = 1

Example #2

Show file

File: cvrp_env.py Project: songCNMS/vrp

    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(self.num_trucks)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates()
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)

        self.is_constraint_imposed = False
        self.constraint_automaton = CVPRConstraints(cvrp_constraints[0])

        self.vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)
        self.node_remaining_demand = np.zeros(self.num_nodes)
        self.vehicle_position = np.zeros((self.num_trucks, self.num_nodes))
        self.constraint_status = np.zeros(
            self.constraint_automaton.num_constraint_states)

        self.depot = self.vrp_problem.depots[0] - 1
        self.cur_node = self.depot
        self.cur_node_index = 0
        self.step_reward = 0
        self.timestep = 0
        self.episode_len = self.num_nodes * 100
        self.cur_vehicle_id = 0
        self.node_in_sequence = []

        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        self.node_in_sequence = [a[1] for a in angle_list]

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

Example #3

Show file

File: ma_cvrp_env.py Project: songCNMS/vrp

    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(2)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.min_cost = np.min(self.cost_matrix)

        self.depot = self.vrp_problem.depots[0] - 1
        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))

        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()
        # self.node_in_sequence = [self.depot] + self.vrp_problem.node_in_sequence_ortool
        # self.node_in_sequence = [0, 5, 10, 15, 14, 17, 26, 6, 23, 4, 11, 18, 9, 22, 25, 30, 16, 7, 13, 2, 3, 29, 20, 8, 28, 24, 27, 12, 1, 21, 31, 19]

        # self.node_in_sequence = list(range(self.num_nodes))

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.vehicle_agents = [
            VehicleAgent(i, self) for i in range(self.num_trucks)
        ]
        self.global_agent = GlobalAgent(self)

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

Example #4

Show file

File: cvrp_env.py Project: songCNMS/vrp

class CVRPEnv(gym.Env):
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(self.num_trucks)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates()
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)

        self.is_constraint_imposed = False
        self.constraint_automaton = CVPRConstraints(cvrp_constraints[0])

        self.vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)
        self.node_remaining_demand = np.zeros(self.num_nodes)
        self.vehicle_position = np.zeros((self.num_trucks, self.num_nodes))
        self.constraint_status = np.zeros(
            self.constraint_automaton.num_constraint_states)

        self.depot = self.vrp_problem.depots[0] - 1
        self.cur_node = self.depot
        self.cur_node_index = 0
        self.step_reward = 0
        self.timestep = 0
        self.episode_len = self.num_nodes * 100
        self.cur_vehicle_id = 0
        self.node_in_sequence = []

        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        self.node_in_sequence = [a[1] for a in angle_list]

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def reset(self):
        self.cur_node_index = 0
        # random.shuffle(self.node_in_sequence)
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.cur_node = self.node_in_sequence[self.cur_node_index]
        self.vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)

        # self.node_to_vehicle[:, self.cur_node] = 1.0
        self.cur_node_encoded[self.cur_node] = 1.0
        # self.node_remaining_demand = np.array([d/self.vehicle_capacity for d in self.demand])
        self.node_remaining_demand = np.ones(self.num_nodes)
        self.node_remaining_demand[self.depot] = 0.0
        self.vehicle_position_encoded = np.zeros(self.num_nodes)
        self.vehicle_position_encoded[self.depot] = self.num_trucks
        self.vehicle_position = np.zeros(self.num_trucks)
        self.vehicle_position[:] = self.depot

        self.state = self.state_calculator()

        self.step_reward = 0
        self.timestep = 0

        if self.is_constraint_imposed:
            self.constraint_automaton.reset()
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1
        return np.array(self.state)

    def path_cost(self, node_to_vehicle):
        node_on_path = []
        for i in range(self.num_nodes):
            if node_to_vehicle[i] == 1.0 and i != self.depot:
                node_on_path.append(i)
        visited_node = [self.depot]
        cur_node = self.depot
        total_cost = 0.0
        for _ in range(len(node_on_path)):
            cost = []
            for node in node_on_path:
                if node not in visited_node:
                    cost.append((node, self.cost_matrix[cur_node][node]))
            cost = sorted(cost, key=(lambda x: x[1]))
            cur_node = cost[0][0]
            total_cost += cost[0][1]
            visited_node.append(cur_node)
        return total_cost, visited_node

    def validity_check(self):
        for i in range(self.num_trucks):
            if np.dot(self.node_to_vehicle[i, :],
                      self.demand) > self.vehicle_capacity:
                return False
        return True

    def state_calculator(self):
        return np.concatenate(
            (self.vehicle_remaining_capacity, self.vehicle_position_encoded,
             self.node_remaining_demand, self.constraint_status))
        # return np.concatenate((self.vehicle_remaining_capacity,
        #                        self.node_to_vehicle.flatten(),
        #                        self.vehicle_position.flatten(),
        #                        self.cur_node_encoded,
        #                        self.node_remaining_demand))

    def step(self, action):
        self.step_reward = 0.0
        dones = False
        self.node_to_vehicle[action, self.cur_node] = 1.0
        cur_vehicle_pos = int(self.vehicle_position[action])

        if self.vehicle_remaining_capacity[
                action] + 1.0 / self.vehicle_capacity > self.demand[
                    self.cur_node] / self.vehicle_capacity:
            self.vehicle_remaining_capacity[action] = max(
                0.0, self.vehicle_remaining_capacity[action] -
                self.demand[self.cur_node] / self.vehicle_capacity)
            self.step_reward = -self.cost_matrix[cur_vehicle_pos][
                self.cur_node] / self.max_cost
        else:
            self.vehicle_remaining_capacity[action] = 0.0
            self.step_reward = -1.0 - self.cost_matrix[cur_vehicle_pos][
                self.cur_node] / self.max_cost

        self.vehicle_position_encoded[self.cur_node] -= 1.0
        self.node_remaining_demand[self.cur_node] = 0.0
        self.cur_node_encoded[self.cur_node] = 0.0
        self.cur_node_index += 1
        dones = (dones | (self.cur_node_index >= len(self.node_in_sequence)))
        if not dones:
            self.cur_node = self.node_in_sequence[self.cur_node_index]
        else:
            self.cur_node = self.depot

        self.vehicle_position_encoded[self.cur_node] += 1.0
        self.cur_node_encoded[self.cur_node] = 1.0
        self.vehicle_position[action] = self.cur_node

        if self.is_constraint_imposed:
            self.constraint_automaton.step(self.node_to_vehicle)
            if not self.constraint_automaton.is_accepted:
                self.step_reward -= 1.0
            self.constraint_status[:] = 0
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1

        self.state = self.state_calculator()
        return np.array(self.state), self.step_reward, dones, {}

Example #5

Show file

File: cvrp_env.py Project: songCNMS/vrp

class CVRPSiteEnv(gym.Env):
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(self.num_nodes)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates()
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.min_cost = np.min(self.cost_matrix)

        self.is_constraint_imposed = False
        self.constraint_automaton = CVPRConstraints(
            cvrp_constraints[config['constraint_id']])

        self.depot = self.vrp_problem.depots[0] - 1
        self.cur_node = self.depot
        self.cur_node_index = 0
        self.step_reward = 0
        self.timestep = 0
        self.episode_len = self.num_nodes * 20
        self.cur_vehicle_id = 1
        self.node_in_sequence = []

        self.vehicle_total_remaining_capacity = np.array(
            [self.num_trucks - 1.0])
        self.cur_vehicle_remaining_capacity = np.array([1.0])
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_node_encoded = np.zeros(self.num_nodes)
        self.node_remaining_demand = np.zeros(self.num_nodes)
        self.cur_vehicle_position = np.zeros(self.num_nodes)
        self.cur_vehicle_position[self.depot] = 1.0
        self.constraint_status = np.zeros(
            self.constraint_automaton.num_constraint_states)

        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        self.node_in_sequence = [self.depot] + [a[1] for a in angle_list]
        # self.node_in_sequence = [i for i in range(self.num_nodes)]

        self.state = None
        self.action_mask = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks + 1,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

    def state_calculator(self):
        self.state = np.concatenate(
            (self.cur_vehicle_remaining_capacity, self.cur_vehicle_position,
             self.constraint_status, self.node_remaining_demand))
        return self.state

    def action_mask_calculator(self):
        action_mask = np.zeros(self.num_nodes)
        for i in range(self.num_nodes):
            next_node = self.node_in_sequence[i]
            if next_node == self.cur_node:
                continue
            elif self.cur_vehicle_remaining_capacity[
                    0] + 0.01 / self.vehicle_capacity < self.demand[
                        next_node] / self.vehicle_capacity:
                continue
            elif (next_node !=
                  self.depot) & (self.node_remaining_demand[next_node] == 0):
                continue
            else:
                action_mask[i] = 1
        self.action_mask = action_mask
        return self.action_mask

    def reset(self):
        self.cur_vehicle_id = 1
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.cur_node = self.node_in_sequence[self.depot]
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.vehicle_total_remaining_capacity = np.array(
            [self.num_trucks - 1.0])
        self.cur_vehicle_remaining_capacity = np.array([1.0])
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.node_remaining_demand = np.ones(self.num_nodes)
        self.cur_vehicle_position = np.zeros(self.num_nodes)
        self.cur_vehicle_position[self.depot] = 1.0

        self.state = self.state_calculator()
        self.action_mask = self.action_mask_calculator()

        self.step_reward = 0
        self.timestep = 0

        if self.is_constraint_imposed:
            self.constraint_automaton.reset()
            self.constraint_status[:] = 0
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1
        return self.state, {'action_mask': self.action_mask}

    def step(self, action):
        self.step_reward = 0.0
        dones = False
        next_node = self.node_in_sequence[action]
        cur_remaining_capacity = self.cur_vehicle_remaining_capacity[0]
        self.timestep += 1

        self.cur_vehicle_remaining_capacity[0] = max(
            0.0, cur_remaining_capacity -
            self.demand[next_node] / self.vehicle_capacity)
        self.step_reward += (
            1.0 - self.cost_matrix[self.cur_node][next_node] / self.max_cost)

        self.node_to_vehicle[:, next_node] = 0.0
        visit_order = np.sum([
            1 if x > 0 else 0
            for x in self.node_to_vehicle[self.cur_vehicle_id - 1, :]
        ])
        self.node_to_vehicle[self.cur_vehicle_id - 1,
                             next_node] = visit_order + 1.0

        self.node_remaining_demand[next_node] = 0.0

        if next_node == self.depot:
            if self.vehicle_total_remaining_capacity[0] > 0.0:
                self.cur_vehicle_remaining_capacity[0] = 1.0
                self.vehicle_total_remaining_capacity[0] -= 1.0
            else:
                self.cur_vehicle_remaining_capacity[0] = 0.0
            self.cur_vehicle_id += 1

        dones = ((self.cur_vehicle_id > self.num_trucks) |
                 (self.timestep >= self.episode_len))

        if self.is_constraint_imposed and self.constraint_automaton.is_accepted:
            self.constraint_automaton.step({
                'state':
                self.state,
                'demand':
                self.node_remaining_demand,
                'max_cost':
                self.max_cost,
                'cost':
                self.cost_matrix[self.cur_node][next_node]
            })
            if not self.constraint_automaton.is_accepted:
                self.step_reward = -(self.max_cost -
                                     self.min_cost) * 100 / self.max_cost
                dones = True
            self.constraint_status[:] = 0
            self.constraint_status[
                self.constraint_automaton.automata_state] = 1

        self.cur_node = next_node
        self.cur_vehicle_position[:] = 0.0
        self.cur_vehicle_position[self.cur_node] = 1.0

        self.state = self.state_calculator()
        self.action_mask = self.action_mask_calculator()
        return self.state, self.step_reward, dones, {
            'action_mask': self.action_mask,
            'last_action': action
        }

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def path_cost(self, node_to_vehicle):
        node_on_path = []
        for i in range(self.num_nodes):
            if node_to_vehicle[i] > 0.0 and i != self.depot:
                node_on_path.append((node_to_vehicle[i], i))
        node_on_path = sorted(node_on_path, key=lambda x: x[0])
        node_on_path = [x[1] for x in node_on_path]
        visited_node = [self.depot] + node_on_path
        pre_node = self.depot
        total_cost = 0.0
        for cur_node in node_on_path:
            total_cost += self.cost_matrix[pre_node][cur_node]
            pre_node = cur_node
        return total_cost, visited_node

    def validity_check(self):
        for i in range(self.num_trucks):
            if np.dot(self.node_to_vehicle[i, :],
                      self.demand) > self.vehicle_capacity:
                return False
        return True

Example #6

Show file

def main(vrp_problem):
    data, manager, routing, solution = vrp_problem.ortool_solve()
    print_solution(data, manager, routing, solution)
    return ortool_solution_to_network(vrp_problem, data, manager, routing,
                                      solution)


import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--problem", type=str, default="A-n32-k5")

if __name__ == '__main__':
    args = parser.parse_args()
    problem_name = args.problem
    vrp_problem = VRProblem(problem_name)
    plt.figure(figsize=(30, 30))
    plt.axis("on")
    G, pos, route_edges, route_distance = main(vrp_problem)
    labels = {}
    for node in G.nodes():
        labels[node] = node
    nx.draw_networkx_nodes(G, pos, node_size=1000)
    nx.draw_networkx_labels(G, pos, labels, font_size=30, font_color="black")
    cmap = matplotlib.cm.get_cmap('Spectral')
    for vehicle_id in route_edges.keys():
        nx.draw_networkx_edges(G,
                               pos,
                               width=2,
                               arrows=True,
                               arrowsize=100,

Example #7

Show file

File: ma_dp_cvrp_env.py Project: songCNMS/vrp

class MAVRPSiteEnv(MultiAgentEnv):
    global_env_episilon = 0.8

    def __init__(self, config):
        random.seed(os.getpid())
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.random_demand = self.config['demand_sample']
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(3)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.depot = self.vrp_problem.depots[0] - 1

        self.demand = [d for d in self.vrp_problem.node_demand]

        self.max_demand = np.max(self.demand)
        self.min_demand = self.max_demand
        for i in range(self.num_nodes):
            if i != self.depot and self.demand[i] < self.min_demand:
                self.min_demand = self.demand[i]

        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.median_cost = np.median(self.cost_matrix)

        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.cur_vehicle_index = 0

        self.vehicle_remaining_capacity = 1.0
        self.all_vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.vehicle_pos = np.array([self.depot] * self.num_trucks)
        self.cur_vehicle_pos = self.depot
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.eval_mode = False
        MAVRPSiteEnv.global_env_episilon = 0.9999 * MAVRPSiteEnv.global_env_episilon + 0.0001 * 0.05
        self.env_episilon = MAVRPSiteEnv.global_env_episilon

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

        self.latest_layer = self.num_nodes - 1
        self.layer_cnt = 1

    def state_calculator(self):
        vehicle_pos_and_cap = np.zeros(self.num_nodes)
        for i in range(self.num_trucks):
            vehicle_pos_and_cap[
                self.vehicle_pos[i]] = self.all_vehicle_remaining_capacity[i]

        cur_node_pos_enc = np.zeros(self.num_nodes)
        cur_node_pos_enc[self.cur_node] = 1.0
        cur_vehicle_pos_enc = np.zeros(self.num_nodes)
        cur_vehicle_pos_enc[self.cur_vehicle_pos] = 1.0

        # node_cost_enc = np.zeros(self.num_nodes)
        # for i in range(self.num_nodes):
        #     if ((i == self.depot)
        #          or (self.node_remaining_demand[i] > 0
        #           and self.vehicle_remaining_capacity >= self.node_remaining_demand[i])):
        #         node_cost_enc[i] = 1.0 - (self.max_cost - self.cost_matrix[self.cur_vehicle_pos][i]) / self.max_cost

        # make sure node_remaining_demand is the end of the state
        self.state = np.concatenate(
            (vehicle_pos_and_cap, cur_node_pos_enc, cur_vehicle_pos_enc,
             self.node_remaining_demand))
        return self.state

    def sample_demand(self):
        remaining_demand_total = int(self.num_trucks * self.vehicle_capacity *
                                     random.randint(90, 95) / 100)
        random_demand = [self.min_demand] * self.num_nodes
        random_demand[self.depot] = 0.0
        remaining_demand_total -= np.sum(random_demand)
        i = 0
        while remaining_demand_total > 0:
            if i != self.depot and random_demand[i] < self.max_demand:
                if self.max_demand - self.min_demand < remaining_demand_total:
                    rd = random.randint(
                        0, self.max_demand -
                        max(random_demand[i], self.min_demand))
                else:
                    rd = remaining_demand_total
                random_demand[i] += rd
                remaining_demand_total -= rd
            i = (i + 1) % self.num_nodes
        self.demand = random_demand
        self.vrp_problem.set_demand(self.demand)

    def determine_node_sequence(self):
        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        print(angle_list)
        cur_node_idx = 0
        node_angle_dict = dict({x[1]: x[0] for x in angle_list})
        new_node_in_sequence = []
        while cur_node_idx < len(angle_list):
            _node_to_vehicle = np.zeros(self.num_nodes)
            _cur_demand = 0.0
            _first_node_idx = cur_node_idx
            while cur_node_idx < len(angle_list):
                _cur_node = angle_list[cur_node_idx][1]
                if _cur_demand + self.demand[
                        _cur_node] <= self.vehicle_capacity:
                    _cur_demand += self.demand[_cur_node]
                    _node_to_vehicle[_cur_node] = 1
                    cur_node_idx += 1
                else:
                    break
            _, _path = self.path_cost(_node_to_vehicle)
            vehicle_path = _path[1:]
            if node_angle_dict[vehicle_path[0]] > node_angle_dict[
                    vehicle_path[-1]]:
                vehicle_path.reverse()
            new_node_in_sequence.extend(vehicle_path)
        return [self.depot] + new_node_in_sequence

    def set_mode(self, mode):
        self.eval_mode = mode

    def reset(self):
        if self.random_demand:
            self.sample_demand()
        self.latest_layer = self.num_nodes - 1
        self.layer_cnt = 1
        self.cur_vehicle_index = 0
        MAVRPSiteEnv.global_env_episilon = 0.999 * MAVRPSiteEnv.global_env_episilon + 0.001 * 0.05
        self.env_episilon = MAVRPSiteEnv.global_env_episilon
        self.eval_mode = False
        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        self.step_reward = 0.0
        self.timestep = 0
        self.vehicle_remaining_capacity = 1.0
        self.all_vehicle_remaining_capacity = np.ones(self.num_trucks)
        self.cur_vehicle_pos = self.depot
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])
        self.state = self.state_calculator()
        states = {0: self.state}
        layer = np.sum(self.node_remaining_demand > 0.0)
        infos = {0: {'layer': layer}}
        return states, infos

    def step(self, actions):
        action = actions[0]
        self.timestep += 1
        self.step_reward = 0.0
        layer = np.sum(self.node_remaining_demand > 0.0)
        if action in [1, 0]:
            if action == 1 and self.vehicle_remaining_capacity >= self.node_remaining_demand[
                    self.cur_node]:  # valid move
                self.vehicle_remaining_capacity -= self.node_remaining_demand[
                    self.cur_node]
                self.all_vehicle_remaining_capacity[
                    self.cur_vehicle_index] -= self.node_remaining_demand[
                        self.cur_node]
                self.step_reward = (self.max_cost - self.cost_matrix[
                    self.cur_vehicle_pos][self.cur_node]) / self.max_cost
                self.cur_vehicle_pos = self.cur_node
                self.vehicle_pos[self.cur_vehicle_index] = self.cur_node
                self.node_remaining_demand[self.cur_node] = 0.0
                self.node_to_vehicle[self.cur_vehicle_index,
                                     self.cur_node] = 1.0

            if np.sum(self.node_remaining_demand) > 0.0:
                unvisited_node_idx = []
                if self.node_remaining_demand[self.cur_node] > 0:
                    unvisited_node_idx.append(self.cur_node_index)
                cur_idx = (self.cur_node_index + 1) % self.num_nodes
                while cur_idx != self.cur_node_index:
                    if self.node_remaining_demand[
                            self.node_in_sequence[cur_idx]] > 0.0:
                        unvisited_node_idx.append(cur_idx)
                    cur_idx = (cur_idx + 1) % self.num_nodes

                if not self.eval_mode and random.random() <= self.env_episilon:
                    self.cur_node_index = random.choice(unvisited_node_idx)
                else:
                    self.cur_node_index = unvisited_node_idx[0]
                self.cur_node = self.node_in_sequence[self.cur_node_index]
        elif action == 2:  # change a vehicle
            self.cur_vehicle_index = (self.cur_vehicle_index +
                                      1) % self.num_trucks
            self.vehicle_remaining_capacity = self.all_vehicle_remaining_capacity[
                self.cur_vehicle_index]
            self.cur_vehicle_pos = self.vehicle_pos[self.cur_vehicle_index]

        if np.sum(self.node_remaining_demand > 0.0) == self.latest_layer:
            self.layer_cnt += 1
        else:
            self.layer_cnt = 1
            self.latest_layer = np.sum(self.node_remaining_demand > 0.0)

        done = ((np.sum(self.node_remaining_demand) <= 0.0) |
                (self.timestep >= self.episode_len) |
                (self.layer_cnt > layer * self.num_trucks))
        # if done:
        #     self.step_reward -= np.sum(self.node_remaining_demand)
        #     if np.sum(self.node_remaining_demand) <= 0.0:
        #         for i in range(self.num_trucks):
        #             self.step_reward += (self.max_cost - self.cost_matrix[self.vehicle_pos[i]][self.depot]) / self.max_cost

        self.state = self.state_calculator()
        states = {0: self.state}
        rewards = {0: self.step_reward}
        dones = {'__all__': done}
        infos = {0: {'layer': layer}}
        return states, rewards, dones, infos

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def path_cost(self, node_to_vehicle):
        node_mask = np.zeros(self.num_nodes)
        for i in range(self.num_nodes):
            if node_to_vehicle[i] > 0.0:
                node_mask[i] = 1
        node_mask[self.depot] = 1
        routing_distances, routing_paths = self.vrp_problem.get_ortool_opt_val_with_node_mask(
            node_mask)
        return routing_distances[0], routing_paths[0]

Example #8

Show file

File: ma_cvrp_env.py Project: songCNMS/vrp

class MAVRPSiteEnv(MultiAgentEnv):
    def __init__(self, config):
        self.vrp_problem = VRProblem(config['problem'])
        self.get_ortool_value()
        self.config = config
        self.num_nodes = self.vrp_problem.num_nodes
        self.num_trucks = self.vrp_problem.num_trucks
        self.pox_x = self.vrp_problem.node_pos_x
        self.pox_y = self.vrp_problem.node_pos_y
        self.vehicle_capacity = self.vrp_problem.vehicle_capacity
        self.action_space = gym.spaces.Discrete(2)
        self.cost_matrix = self.vrp_problem.get_nodes_coordinates(None)
        self.demand = [d for d in self.vrp_problem.node_demand]
        self.max_demand = np.max(self.demand)
        self.min_demand = np.min(self.demand)
        self.max_cost_per_demand = self.min_demand / np.max(self.cost_matrix)
        self.total_cost = np.sum(self.cost_matrix)
        self.max_cost = np.max(self.cost_matrix)
        self.min_cost = np.min(self.cost_matrix)

        self.depot = self.vrp_problem.depots[0] - 1
        self.timestep = 0
        self.episode_len = config['episode_len']

        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))

        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])

        self.node_in_sequence = self.determine_node_sequence()
        # self.node_in_sequence = [self.depot] + self.vrp_problem.node_in_sequence_ortool
        # self.node_in_sequence = [0, 5, 10, 15, 14, 17, 26, 6, 23, 4, 11, 18, 9, 22, 25, 30, 16, 7, 13, 2, 3, 29, 20, 8, 28, 24, 27, 12, 1, 21, 31, 19]

        # self.node_in_sequence = list(range(self.num_nodes))

        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]

        self.vehicle_agents = [
            VehicleAgent(i, self) for i in range(self.num_trucks)
        ]
        self.global_agent = GlobalAgent(self)

        self.state = None
        self.reset()
        self.state_dim = self.state.shape[0]
        self.observation_space = Box(low=0.0,
                                     high=self.num_trucks,
                                     shape=(self.state_dim, ),
                                     dtype=np.float64)

    def determine_node_sequence(self):
        angle_list = []
        depot_pos = (self.pox_x[self.depot], self.pox_y[self.depot])
        for i in range(self.num_nodes):
            if i == self.depot:
                continue
            p = (self.pox_x[i], self.pox_y[i])
            angle = compute_angle(depot_pos, p)
            angle_list.append((angle, i))
        angle_list = sorted(angle_list, key=lambda x: x[0])
        print(angle_list)
        new_node_in_sequence = []
        cur_node_idx = 0
        while cur_node_idx < len(angle_list):
            _node_to_vehicle = np.zeros(self.num_nodes)
            _cur_demand = 0.0
            _first_node_idx = cur_node_idx
            while cur_node_idx < len(
                    angle_list) and abs(angle_list[cur_node_idx][0] -
                                        angle_list[_first_node_idx][0]) <= max(
                                            30, 720 // self.num_trucks):
                _cur_node = angle_list[cur_node_idx][1]
                if _cur_demand + self.demand[
                        _cur_node] <= self.vehicle_capacity:
                    _cur_demand += self.demand[_cur_node]
                    _node_to_vehicle[_cur_node] = 1
                    cur_node_idx += 1
                else:
                    break
            _cost, _path = self.path_cost(_node_to_vehicle)
            new_node_in_sequence.extend(_path[1:])
        return [self.depot] + new_node_in_sequence

    def state_calculator(self, vehicle_id):
        cur_node_encoded = np.zeros(self.num_nodes)
        cur_node_encoded[self.cur_node] = 1.0
        vehicle_remaining_capacity = np.zeros(self.num_nodes)
        cur_vehicle_pos_encoded = np.zeros(self.num_nodes)

        if vehicle_id >= 0:
            for vehicle in self.vehicle_agents:
                vehicle_remaining_capacity[
                    vehicle.cur_node] += vehicle.remaining_capacity
            cur_vehicle_remaining_capacity = np.array(
                [self.vehicle_agents[vehicle_id].remaining_capacity])
            cur_vehicle_pos_encoded[
                self.vehicle_agents[vehicle_id].cur_node] = 1.0
            _node_remaining_demand = np.array([
                (d if d <= cur_vehicle_remaining_capacity[0] else 0.0)
                for d in self.node_remaining_demand
            ])
        else:
            cur_vehicle_remaining_capacity = np.array([0.0])
            _node_remaining_demand = np.array(
                [d for d in self.node_remaining_demand])

        # cur_vehicle_encoded = np.zeros(self.num_trucks)
        # cur_vehicle_encoded[vehicle_id] = 1.0

        self.state = np.concatenate(
            (cur_vehicle_pos_encoded, cur_vehicle_remaining_capacity,
             vehicle_remaining_capacity, cur_node_encoded,
             _node_remaining_demand))
        return self.state

    def reset_node_sequence(self):
        pass
        # new_node_in_sequence = list(range(1, self.num_nodes))
        # random.shuffle(new_node_in_sequence)
        # self.node_in_sequence = [self.depot] + new_node_in_sequence
        # new_node_in_sequence = []
        # truck_list = list(range(self.num_trucks))
        # for i in truck_list:
        #     _node_to_vehicle = self.node_to_vehicle[i, :]
        #     _cost, _path = self.path_cost(_node_to_vehicle)
        #     new_node_in_sequence.extend(_path[1:])
        # self.node_in_sequence = new_node_in_sequence
        # print(self.node_in_sequence)

    def reset(self):
        self.cur_node_index = 1
        self.cur_node = self.node_in_sequence[self.cur_node_index]
        self.node_to_vehicle = np.zeros((self.num_trucks, self.num_nodes))
        [v.reset() for v in self.vehicle_agents]
        self.node_remaining_demand = np.array(
            [d / self.vehicle_capacity for d in self.demand])
        self.step_reward = 0.0
        self.timestep = 0
        states = dict()
        self.global_agent.reset()
        states[-1] = self.state_calculator(-1)
        for vehicle_id in range(self.num_trucks):
            states[vehicle_id] = self.state_calculator(vehicle_id)
        return states, {}

    def step(self, actions):
        self.timestep += 1
        self.step_reward = 0.0
        states, rewards, dones, infos = dict(), dict(), dict(), dict()

        dones['__all__'] = ((np.sum(self.node_remaining_demand) <= 0.0) |
                            (self.timestep >= self.episode_len))

        if not dones['__all__']:
            while True:
                self.cur_node_index = (self.cur_node_index +
                                       1) % self.num_nodes
                self.cur_node = self.node_in_sequence[self.cur_node_index]
                if self.cur_node != self.depot and self.node_remaining_demand[
                        self.cur_node] > 0:
                    break

        states[-1] = self.state_calculator(-1)
        for vehicle_id in range(self.num_trucks):
            states[vehicle_id] = self.state_calculator(vehicle_id)
            rewards[vehicle_id] = self.vehicle_agents[vehicle_id].step_reward
            if np.sum(self.node_remaining_demand) <= 0.0:
                rewards[vehicle_id] += (
                    1.0 -
                    self.cost_matrix[self.vehicle_agents[vehicle_id].cur_node][
                        self.depot] / self.max_cost)

        total_agent_reward = np.sum([r for r in rewards.values()])

        rewards[
            -1] = total_agent_reward  # total_agent_reward + self.global_agent.step_reward
        # if global_action == 1 and prev_node != self.cur_node:
        #     rewards[-1] += (1.0 - self.cost_matrix[prev_node][self.cur_node] / self.max_cost)
        for vehicle_id in range(self.num_trucks):
            rewards[vehicle_id] = total_agent_reward
        return states, rewards, dones, infos

    def get_ortool_value(self):
        return self.vrp_problem.get_ortool_opt_val()

    def path_cost(self, node_to_vehicle):
        node_mask = np.zeros(self.num_nodes)
        for i in range(self.num_nodes):
            if node_to_vehicle[i] > 0.0:
                node_mask[i] = 1
        node_mask[self.depot] = 1
        routing_distances, routing_paths = self.vrp_problem.get_ortool_opt_val_with_node_mask(
            node_mask)
        return routing_distances[0], routing_paths[0]