Beispiel #1
0
    def test_view(self):
        g = nx.Graph()
        g.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (1, 5)])
        nx.set_node_attributes(g, {
            0: (0, 1),
            1: (0, 2),
            2: (0, 3),
            3: (0, 4),
            4: (0, 5),
            5: (1, 1)
        },
                               name="coords")
        orders = [(3, 2, 2, 3, 3)
                  ]  # <source, destination, time, length, price>
        drivers = np.array([1, 1, 1, 1, 1, 1])
        action = np.array([1, 0, 0], dtype=float)

        env = TaxiEnv(g, orders, 1, drivers, 10)
        env.seed(123)
        env.set_view([2, 3, 4])
        obs, _, _ = env.get_observation()

        # check observation space and content
        assert env.observation_space_shape == obs.shape
        view_size = 3
        assert env.observation_space_shape == (
            view_size * 4 + 10,
        )  # default income is not included, so its <driver, order, idle, time_id, node_id>
        assert env.action_space_shape == (
            3,
        )  # degree of 1 is 3, but of the rest is 2. So it should be 2 + 1 (staying action)
        assert env.current_node_id in [2, 3, 4]

        # an action [1, 0, 0] for the node 2 means to go to node 3, because its the only neighbor in the view
        env.step(action)
        assert env.current_node_id in [2, 3, 4]
        env.step(action)
        assert env.current_node_id in [2, 3, 4]
        obs, rew, done, info = env.step(action)
        assert (obs[:view_size] == np.array([0.5, 1, 0])).all(
        )  # there are 2 drivers in the node 3 at the end, one from node 2, one from node 4.
        assert (obs[view_size:2 * view_size] == np.array([0, 0, 0])).all()
        assert (obs[2 * view_size:3 * view_size] == np.array([0.5, 1,
                                                              0])).all()
        # next time iteration should happen
        assert env.time == 1
        assert env.current_node_id in [2, 3]
        assert (obs[2 * view_size:3 * view_size] == np.array([0.5, 1,
                                                              0])).all()
        assert (obs[3 * view_size:3 * view_size + 10] == np.array(
            [0, 1, 0, 0, 0, 0, 0, 0, 0, 0])).all()
        assert obs[3 * view_size + 10:].shape == (3, )
        assert (obs[3 * view_size + 10:] == np.array([
            1, 0, 0
        ])).all() or (obs[3 * view_size + 10:] == np.array([0, 1, 0])).all()
        assert [d.position for d in env.all_driver_list] == [0, 1, 3, 2, 3, 5]
Beispiel #2
0
    def test_sync(self):
        g = nx.Graph()
        g.add_edges_from([(0, 1), (1, 2), (2, 3)])
        nx.set_node_attributes(g, {
            0: (0, 1),
            1: (0, 2),
            2: (1, 1),
            3: (1, 2)
        },
                               name="coords")
        orders = [(0, 1, 0, 1, 1), (1, 1, 1, 2, 2), (2, 2, 1, 3, 3),
                  (3, 2, 2, 3, 3)]
        drivers = np.array([1, 0, 0, 5])
        action = np.array([0.3, 0.4, 0.3], dtype=float)

        env = TaxiEnv(g, orders, 1, drivers, 10)
        env.step(action)

        env2 = TaxiEnv(g, orders, 1, drivers, 10)
        env2.sync(env)

        o1, _, _ = env.get_observation()
        o2, _, _ = env2.get_observation()
        assert (o1 == o2).all()

        env.seed(1)
        env2.seed(1)

        while not env.done:
            obs, rew, done, info = env.step(action)
            obs2, rew2, done2, info2 = env2.step(action)

            assert (obs == obs2).all()
            assert rew == rew2
            assert done == done2
            assert info == info2
class TaxiEnvBatch(gym.Env):
    '''
    This class is a wrapper over taxi_env, providing an interface for cA2C,
    that requires processing drivers in batches + some additional context information
    '''
    metadata = {'render.modes': ['rgb_array']}

    def __init__(self,
                 world: nx.Graph,
                 orders: Tuple[int, int, int, int, float],
                 order_sampling_rate: float,
                 drivers_per_node: Array[int],
                 n_intervals: List,
                 wc: float,
                 count_neighbors: bool = False,
                 weight_poorest: bool = False,
                 normalize_rewards: bool = True,
                 minimum_reward: bool = False,
                 reward_bound: float = None,
                 include_income_to_observation: bool = False,
                 poorest_first: bool = False,
                 idle_reward: bool = False) -> None:

        self.itEnv = TaxiEnv(world, orders, order_sampling_rate,
                             drivers_per_node, n_intervals, wc,
                             count_neighbors, weight_poorest,
                             normalize_rewards, minimum_reward, reward_bound,
                             include_income_to_observation, poorest_first,
                             idle_reward)
        self.world = self.itEnv.world
        self.n_intervals = n_intervals
        self.n_drivers = self.itEnv.n_drivers
        self.time = 0
        self.include_income_to_observation = include_income_to_observation
        self.one_cell_action_space = self.itEnv.max_degree + 1
        self.action_space = spaces.Box(low=0,
                                       high=1,
                                       shape=(self.one_cell_action_space *
                                              self.itEnv.world_size, ))

        if include_income_to_observation:
            assert self.itEnv.observation_space_shape[0] == 3 * len(
                self.world) + self.itEnv.n_intervals + 3
            self.observation_space_shape = (
                self.itEnv.observation_space_shape[0] + 2 * len(self.world) -
                3, )
        else:
            assert self.itEnv.observation_space_shape[0] == 3 * len(
                self.world) + self.itEnv.n_intervals
            self.observation_space_shape = (
                self.itEnv.observation_space_shape[0] - len(self.world), )
        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=self.observation_space_shape)

    def reset(self) -> Array[int]:
        self.time = 0
        if self.itEnv.include_income_to_observation:
            t = self.itEnv.world_size + 3
            observation = self.itEnv.reset()[:-t]
            # assuming all incomes are zero
            return np.concatenate(
                (observation, np.zeros(3 * self.itEnv.world_size)))
        else:
            t = self.itEnv.world_size
            return self.itEnv.reset()[:-t]

    def get_reset_info(self):
        '''
        Currently used only to get max_orders and max_drivers, that should current_cell independent
        '''
        return self.itEnv.get_reset_info()

    def step(self,
             action: Array[float]) -> Tuple[Array[int], float, bool, Dict]:
        cells_with_nonzero_drivers = np.sum([
            1 for n in self.itEnv.world.nodes(data=True)
            if n[1]['info'].get_driver_num() > 0
        ])
        nodes_with_orders = np.sum([
            1 for n in self.itEnv.world.nodes(data=True)
            if n[1]['info'].get_order_num() > 0
        ])
        total_orders = np.sum([
            n[1]['info'].get_order_num()
            for n in self.itEnv.world.nodes(data=True)
        ])
        global_observation = np.zeros(5 * self.itEnv.world_size +
                                      self.itEnv.n_intervals)
        global_done = False
        global_reward = 0
        reward_per_node = np.zeros(self.itEnv.world_size)
        init_t = self.itEnv.time
        self.last_action_for_drawing = action

        total_served_orders = 0
        max_driver = None
        max_order = None

        for i in range(cells_with_nonzero_drivers):
            current_cell = self.itEnv.current_node_id
            a = current_cell * self.one_cell_action_space
            action_per_cell = action[a:a + self.one_cell_action_space]

            observation, reward, done, info = self.itEnv.step(action_per_cell)

            reward_per_node[current_cell] = reward
            global_done = done
            global_reward += reward
            total_served_orders += info['served_orders']

            # updated at each step, but the final should be corrent
            max_driver = info["driver normalization constant"]
            max_order = info["order normalization constant"]

            if self.itEnv.include_income_to_observation:
                assert observation.shape[
                    0] == 3 * self.itEnv.world_size + self.itEnv.n_intervals + 3
                size_without_income = 2 * self.itEnv.world_size + self.itEnv.n_intervals
                ws = self.itEnv.world_size
                offset = current_cell
                global_observation[:
                                   size_without_income] = observation[:
                                                                      size_without_income]
                global_observation[size_without_income +
                                   3 * offset:size_without_income +
                                   3 * offset + 3] = observation[-3:]
            else:
                global_observation = observation[:-self.itEnv.world_size]

        # if cells_with_nonzero_drivers == 0:
        #     observation, reward, done, info = self.itEnv.step(action_per_cell)
        #
        #     reward_per_node[current_cell] = reward
        #     global_done = done
        #     global_reward += reward
        #     total_served_orders += info['served_orders']
        #
        #     # updated at each step, but the final should be corrent
        #     max_driver = info["driver normalization constant"]
        #     max_order = info["order normalization constant"]
        #
        #     if self.itEnv.include_income_to_observation:
        #         assert observation.shape[0] == 3*self.itEnv.world_size+self.itEnv.n_intervals+3
        #         size_without_income = 2*self.itEnv.world_size+self.itEnv.n_intervals
        #         ws = self.itEnv.world_size
        #         offset = current_cell
        #         global_observation[:size_without_income] = observation[:size_without_income]
        #         global_observation[size_without_income+3*offset:size_without_income+3*offset+3] = observation[-3:]
        #     else:
        #         global_observation = observation[:-self.itEnv.world_size]

        assert not global_done or init_t + 1 == self.itEnv.n_intervals
        assert self.itEnv.time == init_t + 1
        self.time += 1

        global_info = {
            "reward_per_node":
            reward_per_node,
            "served_orders":
            total_served_orders,
            "nodes_with_drivers":
            cells_with_nonzero_drivers,
            "nodes_with_orders":
            nodes_with_orders,
            "driver normalization constant":
            max_driver,
            "order normalization constant":
            max_order,
            "total_orders":
            total_orders,
            "idle_reward":
            float(
                np.mean([
                    d.get_idle_period() for d in self.itEnv.all_driver_list
                ])),
            "min_idle":
            float(
                np.min(
                    [d.get_idle_period() for d in self.itEnv.all_driver_list]))
        }
        return global_observation, global_reward, global_done, global_info

    def seed(self, seed):
        self.itEnv.seed(seed)

    def get_min_revenue(self):
        return self.itEnv.get_min_revenue()

    def get_total_revenue(self):
        return self.itEnv.get_total_revenue()

    def compute_remaining_drivers_and_orders(self, state):
        return self.itEnv.compute_remaining_drivers_and_orders(state)

    def set_income_bound(self, bound):
        self.itEnv.set_income_bound(bound)

    def render(self, mode='rgb_array'):
        fig = plt.figure(figsize=(20, 20))
        ax = fig.gca()
        ax.axis('off')

        pos = nx.get_node_attributes(self.world, 'coords')
        G = nx.DiGraph(self.world)
        nodelist = []
        edgelist = []
        action = self.last_action_for_drawing
        act = self.itEnv.action_space_shape[0]
        node_colors = []
        edge_colors = []
        for n in self.world.nodes():
            node_action = action[act * n:act * (n + 1)]
            nodelist.append(n)
            node_colors.append(node_action[-1])
            j = 0
            added = 0
            for nn in self.world.neighbors(n):
                if node_action[j] > 0:
                    edgelist.append((n, nn))
                    edge_colors.append(node_action[j])
                    added += 1
                j += 1
            assert abs(np.sum(node_action) - 1) < 0.00001, node_action
            assert node_action[-1] != 0 or added > 0, (node_action, n)

        nx.draw_networkx(G,
                         edgelist=edgelist,
                         edge_color=edge_colors,
                         vmin=-1,
                         vmax=1,
                         node_shape='.',
                         edge_vmax=1.1,
                         cmap=matplotlib.cm.get_cmap("Blues"),
                         edge_cmap=matplotlib.cm.get_cmap("Blues"),
                         node_color=node_colors,
                         nodelist=nodelist,
                         pos=pos,
                         arrows=True,
                         with_labels=False,
                         ax=ax)

        canvas = FigureCanvasAgg(fig)
        canvas.draw()
        s, (width, height) = canvas.print_to_buffer()

        # Option 2a: Convert to a NumPy array
        X = np.frombuffer(s, np.uint8).reshape((height, width, 4))
        plt.close(fig)
        return X