Esempio n. 1
0
class TaxiRebalance(gym.Env, ABC):
    def __init__(self, config):
        self._config = config
        self.curr_time = 0
        self.graph = Graph()
        self.graph.import_graph(graph_file)
        self.sim = Simulator(self.graph)

        self.max_vehicle = self._config['max_vehicle']
        self.reb_interval = self._config['reb_interval']
        self.max_travel_t = self._config['max_travel_time']
        self.max_lookback_steps = int(
            np.ceil(self.max_travel_t / self.reb_interval))
        self.max_passenger = self._config['max_passenger']
        self._num_nodes = len(self._config['nodes_list'])
        self._nodes = tuple(self._config['nodes_list'])
        self._num_neighbors = self._config['near_neighbor']
        self._neighbor_map = self._get_neighbors()
        self._dispatch_rate = self._config['dispatch_rate']

        self.action_space = MultiDiscrete([(self._num_neighbors + 1) * 5] *
                                          self._num_nodes)
        self.observation_space = Tuple((Box(0,
                                            self.max_passenger,
                                            shape=(self._num_nodes, ),
                                            dtype=np.int64),
                                        Box(0,
                                            self.max_vehicle,
                                            shape=(self._num_nodes, ),
                                            dtype=np.int64)))
        self._is_running = False
        self._done = False
        self._start_time = time.time()
        self._alpha = self._config['alpha']
        self._beta = self._config['beta']
        self._step = 0
        self._total_vehicle = None
        self._travel_time = None
        self._pre_action = None
        self._episode = 0
        self._worker_id = str(hash(time.time()))
        self._save_res_every_ep = int(self._config['save_res_every_ep'])
        self._vehicle_speed = self._config['veh_speed']

    def _get_neighbors(self):
        k = self._config['near_neighbor']
        if k + 1 > len(self._nodes):
            k = len(self._nodes) - 1
            self._num_neighbors = k
        neighbor_map = dict()
        for node in self._nodes:
            dist_lst = [(dest, self.graph.graph_top[node]['nei'][dest]['dist'])
                        for dest in self.graph.graph_top[node]['nei']]
            dist_lst.sort(key=lambda x: x[1])
            neighbor_map[node] = tuple(
                self._nodes.index(x[0]) for x in dist_lst[:k + 1])
        return neighbor_map

    def _preprocess_action(self, action):
        assert isinstance(action, np.ndarray)
        if np.isnan(action).sum() > 0:
            print(self._step)
            action = self.action_space.sample()
        action_mat = np.zeros((self._num_nodes, self._num_nodes))
        for nd_idx, chosen_action in enumerate(action):
            chosen_neighbor = chosen_action // 5
            ac_idx = chosen_action % 5
            nb_idx = self._neighbor_map[self._nodes[nd_idx]][chosen_neighbor]
            dispatch_rate = (ac_idx + 1) / 5
            action_mat[nd_idx, nb_idx] = dispatch_rate
            if nb_idx != nd_idx:
                action_mat[nd_idx, nd_idx] = 1 - dispatch_rate
            else:
                action_mat[nd_idx, nd_idx] = 1
        sim_action = dict()
        for nd_idx, node in enumerate(self._nodes):
            sim_action[node] = action_mat[nd_idx, :]
        return sim_action, action_mat

    def reset(self):
        if self._done:
            self._episode += 1
            self._done = False
            # print(f'Episode: {self._episode} done!')

        if self._is_running:
            self.sim.finishing_touch(self._start_time)
            if self._episode % self._save_res_every_ep == 0:
                self.sim.save_result(RESULTS,
                                     self._worker_id,
                                     unique_name=False)
                if self._config['plot_queue_len']:
                    self.sim.plot_pass_queue_len(mode='taxi',
                                                 suffix=self._worker_id)
                    self.sim.plot_pass_wait_time(mode='taxi',
                                                 suffix=self._worker_id)
            self._is_running = False

        self.curr_time = 0
        self._step = 0

        self.graph = Graph()
        self.graph.import_graph(graph_file)
        self.sim = Simulator(self.graph)
        self.sim.import_arrival_rate(unit=(1, 'sec'))
        self.sim.import_vehicle_attribute(file_name=vehicle_file)
        self.sim.set_running_time(start_time=self._config['start_time'],
                                  time_horizon=self._config['time_horizon'],
                                  unit='hour')
        self.sim.routing.set_routing_method('simplex')
        self.sim.initialize(seed=0)
        self._total_vehicle = self.sim.vehicle_attri['taxi']['total']

        self._travel_time = np.zeros((self._num_nodes, self._num_nodes))
        for i, node in enumerate(self.graph.graph_top):
            for j, road in enumerate(self.graph.graph_top):
                if i != j:
                    self._travel_time[
                        i,
                        j] = self.graph.graph_top[node]['node'].road[road].dist
        self._travel_time /= np.linalg.norm(self._travel_time, ord=np.inf)
        self._pre_action = np.zeros((self._num_neighbors, self._num_nodes))

        with open(vehicle_file, 'r') as v_file:
            vehicle_dist = json.load(v_file)
        vehicle_dist = vehicle_dist['taxi']['distrib']
        vehicle_dist = np.array([vehicle_dist[x] for x in vehicle_dist])
        return np.zeros((self._num_nodes, )), vehicle_dist

    def step(self, action):
        self._step += 1
        if not self._is_running:
            self._is_running = True
        sim_action, action_mat = self._preprocess_action(action)
        # print(sim_action)
        p_queue, v_queue = self.sim.step(action=sim_action,
                                         step_length=self.reb_interval,
                                         curr_time=self.curr_time)
        self.curr_time += self.reb_interval
        p_queue = np.array(p_queue)
        v_queue = np.array(v_queue)
        reward = -self._beta * (p_queue.sum() * (1 - self._alpha) * 1e-1 +
                                self._alpha * self._vehicle_speed * np.maximum(
                                    (v_queue - p_queue).reshape(
                                        (self._num_nodes, 1)) * action_mat *
                                    self._travel_time, 0).sum())
        # print(self._vehicle_speed)
        # print('reward', reward)
        #print('passenger', p_queue)
        #print('vehicle', v_queue)
        #print('action',action_mat)
        #print('reward', reward)
        # print(f'at node {v_queue.sum()}, on road {self._total_vehicle - v_queue.sum()}')
        # print(f'action diff {np.linalg.norm(self._pre_action-action)}')
        self._pre_action = action
        if self.curr_time >= self._config['time_horizon'] * 3600 - 1:
            self._done = True
        return (p_queue, v_queue), reward, self._done, {}
Esempio n. 2
0
class TaxiRebalance(gym.Env, ABC):
    def __init__(self, config):
        self._config = config
        self.curr_time = 0
        self.graph = Graph()
        self.graph.import_graph(graph_file)
        self.sim = Simulator(self.graph)

        self.max_vehicle = self._config['max_vehicle']
        self.reb_interval = self._config['reb_interval']
        self.max_travel_t = self._config['max_travel_time']
        self.max_lookback_steps = int(
            np.ceil(self.max_travel_t / self.reb_interval))
        self.max_passenger = self._config['max_passenger']
        self.num_nodes = len(self._config['nodes_list'])
        self.near_neighbor = self._config['near_neighbor']

        self.action_space = Box(low=0,
                                high=1,
                                shape=((self.near_neighbor + 1) *
                                       self.num_nodes, ))
        self.observation_space = Tuple((Box(0,
                                            self.max_passenger,
                                            shape=(self.num_nodes, ),
                                            dtype=np.int64),
                                        Box(0,
                                            self.max_vehicle,
                                            shape=(self.num_nodes, ),
                                            dtype=np.int64)))

        self._is_running = False
        self._done = False
        self._start_time = time.time()
        self._alpha = 0
        self._step = 0
        self._total_vehicle = None
        self._travel_time = None
        self._pre_action = None
        self._episode = 0

    def reset(self):
        if self._done:
            self._episode += 1
            self._done = False
            print(f'Episode: {self._episode} done!')

        if self._is_running:
            self.sim.finishing_touch(self._start_time)
            # self.sim.save_result(RESULTS)
            if self._config['plot_queue_len']:
                # self.sim.plot_combo_queue_anim(mode='taxi', frames=100)
                self.sim.plot_pass_queue_len(mode='taxi',
                                             suffix=f'ep_{self._episode}')
                self.sim.plot_pass_wait_time(mode='taxi',
                                             suffix=f'ep_{self._episode}')
            self._is_running = False

        self.curr_time = 0
        self._alpha = 0
        self._step = 0

        self.graph = Graph()
        self.graph.import_graph(graph_file)
        self.sim = Simulator(self.graph)
        self.sim.import_arrival_rate(unit=(1, 'sec'))
        self.sim.import_vehicle_attribute(file_name=vehicle_file)
        self.sim.set_running_time(start_time=self._config['start_time'],
                                  time_horizon=self._config['time_horizon'],
                                  unit='hour')
        self.sim.routing.set_routing_method('simplex')
        self.sim.initialize(seed=0)
        self._total_vehicle = self.sim.vehicle_attri['taxi']['total']

        self._travel_time = np.zeros((self.num_nodes, self.num_nodes))
        for i, node in enumerate(self.graph.graph_top):
            for j, road in enumerate(self.graph.graph_top):
                if i != j:
                    self._travel_time[
                        i,
                        j] = self.graph.graph_top[node]['node'].road[road].dist
        self._travel_time /= np.linalg.norm(self._travel_time, ord=np.inf)
        self._pre_action = np.zeros((self.near_neighbor + 1, self.num_nodes))

        with open(vehicle_file, 'r') as file:
            vehicle_dist = json.load(file)
        vehicle_dist = vehicle_dist['taxi']['distrib']
        vehicle_dist = np.array([vehicle_dist[x] for x in vehicle_dist])
        return np.zeros((self.num_nodes, )), vehicle_dist

    def step(self, action):
        assert isinstance(action, np.ndarray)
        self._step += 1
        if np.isnan(action).sum() > 0:
            print(self._step)
            action = self.action_space.sample()
        action = action.reshape((self.num_nodes, self.near_neighbor + 1))
        action = action / np.sum(action, axis=1, keepdims=True)
        # action = self._alpha*action + (1 - self._alpha) * np.eye(5)
        # print(action)

        if not self._is_running:
            self._is_running = True

        sim_action = dict()
        for idx, node in enumerate(self._config['nodes_list']):
            sim_action[node] = np.squeeze(action[idx, :] /
                                          np.sum(action[idx, :]))
        # print(sim_action)
        p_queue, v_queue = self.sim.step(action=sim_action,
                                         step_length=self.reb_interval,
                                         curr_time=self.curr_time)
        self.curr_time += self.reb_interval
        p_queue = np.array(p_queue)
        v_queue = np.array(v_queue)
        reward = -(p_queue.sum() + 16 * np.maximum(
            np.array(v_queue - p_queue, ndmin=2).T * action *
            self._travel_time, 0).sum())
        # print(reward)
        # print('passenger', p_queue)
        # print('vehicle', v_queue)
        # print(f'at node {v_queue.sum()}, on road {self._total_vehicle - v_queue.sum()}')
        # print(f'action diff {np.linalg.norm(self._pre_action-action)}')
        self._pre_action = action
        if self.curr_time >= self._config['time_horizon'] * 3600 - 1:
            self._done = True
        return (p_queue, v_queue), reward, self._done, {}