class TaxiRebalance(gym.Env, ABC): def __init__(self, config): self._config = config self.curr_time = 0 self.graph = Graph() self.graph.import_graph(graph_file) self.sim = Simulator(self.graph) self.max_vehicle = self._config['max_vehicle'] self.reb_interval = self._config['reb_interval'] self.max_travel_t = self._config['max_travel_time'] self.max_lookback_steps = int( np.ceil(self.max_travel_t / self.reb_interval)) self.max_passenger = self._config['max_passenger'] self._num_nodes = len(self._config['nodes_list']) self._nodes = tuple(self._config['nodes_list']) self._num_neighbors = self._config['near_neighbor'] self._neighbor_map = self._get_neighbors() self._dispatch_rate = self._config['dispatch_rate'] self.action_space = MultiDiscrete([(self._num_neighbors + 1) * 5] * self._num_nodes) self.observation_space = Tuple((Box(0, self.max_passenger, shape=(self._num_nodes, ), dtype=np.int64), Box(0, self.max_vehicle, shape=(self._num_nodes, ), dtype=np.int64))) self._is_running = False self._done = False self._start_time = time.time() self._alpha = self._config['alpha'] self._beta = self._config['beta'] self._step = 0 self._total_vehicle = None self._travel_time = None self._pre_action = None self._episode = 0 self._worker_id = str(hash(time.time())) self._save_res_every_ep = int(self._config['save_res_every_ep']) self._vehicle_speed = self._config['veh_speed'] def _get_neighbors(self): k = self._config['near_neighbor'] if k + 1 > len(self._nodes): k = len(self._nodes) - 1 self._num_neighbors = k neighbor_map = dict() for node in self._nodes: dist_lst = [(dest, self.graph.graph_top[node]['nei'][dest]['dist']) for dest in self.graph.graph_top[node]['nei']] dist_lst.sort(key=lambda x: x[1]) neighbor_map[node] = tuple( self._nodes.index(x[0]) for x in dist_lst[:k + 1]) return neighbor_map def _preprocess_action(self, action): assert isinstance(action, np.ndarray) if np.isnan(action).sum() > 0: print(self._step) action = self.action_space.sample() action_mat = np.zeros((self._num_nodes, self._num_nodes)) for nd_idx, chosen_action in enumerate(action): chosen_neighbor = chosen_action // 5 ac_idx = chosen_action % 5 nb_idx = self._neighbor_map[self._nodes[nd_idx]][chosen_neighbor] dispatch_rate = (ac_idx + 1) / 5 action_mat[nd_idx, nb_idx] = dispatch_rate if nb_idx != nd_idx: action_mat[nd_idx, nd_idx] = 1 - dispatch_rate else: action_mat[nd_idx, nd_idx] = 1 sim_action = dict() for nd_idx, node in enumerate(self._nodes): sim_action[node] = action_mat[nd_idx, :] return sim_action, action_mat def reset(self): if self._done: self._episode += 1 self._done = False # print(f'Episode: {self._episode} done!') if self._is_running: self.sim.finishing_touch(self._start_time) if self._episode % self._save_res_every_ep == 0: self.sim.save_result(RESULTS, self._worker_id, unique_name=False) if self._config['plot_queue_len']: self.sim.plot_pass_queue_len(mode='taxi', suffix=self._worker_id) self.sim.plot_pass_wait_time(mode='taxi', suffix=self._worker_id) self._is_running = False self.curr_time = 0 self._step = 0 self.graph = Graph() self.graph.import_graph(graph_file) self.sim = Simulator(self.graph) self.sim.import_arrival_rate(unit=(1, 'sec')) self.sim.import_vehicle_attribute(file_name=vehicle_file) self.sim.set_running_time(start_time=self._config['start_time'], time_horizon=self._config['time_horizon'], unit='hour') self.sim.routing.set_routing_method('simplex') self.sim.initialize(seed=0) self._total_vehicle = self.sim.vehicle_attri['taxi']['total'] self._travel_time = np.zeros((self._num_nodes, self._num_nodes)) for i, node in enumerate(self.graph.graph_top): for j, road in enumerate(self.graph.graph_top): if i != j: self._travel_time[ i, j] = self.graph.graph_top[node]['node'].road[road].dist self._travel_time /= np.linalg.norm(self._travel_time, ord=np.inf) self._pre_action = np.zeros((self._num_neighbors, self._num_nodes)) with open(vehicle_file, 'r') as v_file: vehicle_dist = json.load(v_file) vehicle_dist = vehicle_dist['taxi']['distrib'] vehicle_dist = np.array([vehicle_dist[x] for x in vehicle_dist]) return np.zeros((self._num_nodes, )), vehicle_dist def step(self, action): self._step += 1 if not self._is_running: self._is_running = True sim_action, action_mat = self._preprocess_action(action) # print(sim_action) p_queue, v_queue = self.sim.step(action=sim_action, step_length=self.reb_interval, curr_time=self.curr_time) self.curr_time += self.reb_interval p_queue = np.array(p_queue) v_queue = np.array(v_queue) reward = -self._beta * (p_queue.sum() * (1 - self._alpha) * 1e-1 + self._alpha * self._vehicle_speed * np.maximum( (v_queue - p_queue).reshape( (self._num_nodes, 1)) * action_mat * self._travel_time, 0).sum()) # print(self._vehicle_speed) # print('reward', reward) #print('passenger', p_queue) #print('vehicle', v_queue) #print('action',action_mat) #print('reward', reward) # print(f'at node {v_queue.sum()}, on road {self._total_vehicle - v_queue.sum()}') # print(f'action diff {np.linalg.norm(self._pre_action-action)}') self._pre_action = action if self.curr_time >= self._config['time_horizon'] * 3600 - 1: self._done = True return (p_queue, v_queue), reward, self._done, {}
class TaxiRebalance(gym.Env, ABC): def __init__(self, config): self._config = config self.curr_time = 0 self.graph = Graph() self.graph.import_graph(graph_file) self.sim = Simulator(self.graph) self.max_vehicle = self._config['max_vehicle'] self.reb_interval = self._config['reb_interval'] self.max_travel_t = self._config['max_travel_time'] self.max_lookback_steps = int( np.ceil(self.max_travel_t / self.reb_interval)) self.max_passenger = self._config['max_passenger'] self.num_nodes = len(self._config['nodes_list']) self.near_neighbor = self._config['near_neighbor'] self.action_space = Box(low=0, high=1, shape=((self.near_neighbor + 1) * self.num_nodes, )) self.observation_space = Tuple((Box(0, self.max_passenger, shape=(self.num_nodes, ), dtype=np.int64), Box(0, self.max_vehicle, shape=(self.num_nodes, ), dtype=np.int64))) self._is_running = False self._done = False self._start_time = time.time() self._alpha = 0 self._step = 0 self._total_vehicle = None self._travel_time = None self._pre_action = None self._episode = 0 def reset(self): if self._done: self._episode += 1 self._done = False print(f'Episode: {self._episode} done!') if self._is_running: self.sim.finishing_touch(self._start_time) # self.sim.save_result(RESULTS) if self._config['plot_queue_len']: # self.sim.plot_combo_queue_anim(mode='taxi', frames=100) self.sim.plot_pass_queue_len(mode='taxi', suffix=f'ep_{self._episode}') self.sim.plot_pass_wait_time(mode='taxi', suffix=f'ep_{self._episode}') self._is_running = False self.curr_time = 0 self._alpha = 0 self._step = 0 self.graph = Graph() self.graph.import_graph(graph_file) self.sim = Simulator(self.graph) self.sim.import_arrival_rate(unit=(1, 'sec')) self.sim.import_vehicle_attribute(file_name=vehicle_file) self.sim.set_running_time(start_time=self._config['start_time'], time_horizon=self._config['time_horizon'], unit='hour') self.sim.routing.set_routing_method('simplex') self.sim.initialize(seed=0) self._total_vehicle = self.sim.vehicle_attri['taxi']['total'] self._travel_time = np.zeros((self.num_nodes, self.num_nodes)) for i, node in enumerate(self.graph.graph_top): for j, road in enumerate(self.graph.graph_top): if i != j: self._travel_time[ i, j] = self.graph.graph_top[node]['node'].road[road].dist self._travel_time /= np.linalg.norm(self._travel_time, ord=np.inf) self._pre_action = np.zeros((self.near_neighbor + 1, self.num_nodes)) with open(vehicle_file, 'r') as file: vehicle_dist = json.load(file) vehicle_dist = vehicle_dist['taxi']['distrib'] vehicle_dist = np.array([vehicle_dist[x] for x in vehicle_dist]) return np.zeros((self.num_nodes, )), vehicle_dist def step(self, action): assert isinstance(action, np.ndarray) self._step += 1 if np.isnan(action).sum() > 0: print(self._step) action = self.action_space.sample() action = action.reshape((self.num_nodes, self.near_neighbor + 1)) action = action / np.sum(action, axis=1, keepdims=True) # action = self._alpha*action + (1 - self._alpha) * np.eye(5) # print(action) if not self._is_running: self._is_running = True sim_action = dict() for idx, node in enumerate(self._config['nodes_list']): sim_action[node] = np.squeeze(action[idx, :] / np.sum(action[idx, :])) # print(sim_action) p_queue, v_queue = self.sim.step(action=sim_action, step_length=self.reb_interval, curr_time=self.curr_time) self.curr_time += self.reb_interval p_queue = np.array(p_queue) v_queue = np.array(v_queue) reward = -(p_queue.sum() + 16 * np.maximum( np.array(v_queue - p_queue, ndmin=2).T * action * self._travel_time, 0).sum()) # print(reward) # print('passenger', p_queue) # print('vehicle', v_queue) # print(f'at node {v_queue.sum()}, on road {self._total_vehicle - v_queue.sum()}') # print(f'action diff {np.linalg.norm(self._pre_action-action)}') self._pre_action = action if self.curr_time >= self._config['time_horizon'] * 3600 - 1: self._done = True return (p_queue, v_queue), reward, self._done, {}