def test_count_neighbors(self): g = nx.Graph() g.add_edges_from([(0, 1), (1, 2), (2, 0)]) nx.set_node_attributes(g, { 0: (0, 1), 1: (0, 2), 2: (1, 1) }, name="coords") orders = [(0, 1, 0, 1, 1), (1, 2, 0, 2, 2), (2, 0, 0, 3, 3)] drivers = np.array([4, 0, 1]) env = TaxiEnv(g, orders, 1, drivers, 3, 0.5, count_neighbors=True) observation1 = env.reset() env.current_node_id = 0 env.non_empty_nodes = [1, 2] dispatch_list = env.make_order_dispatch_list_and_remove_orders() assert len(dispatch_list) == 2 env.reset() # order_dispatch_list can be run only single time env.current_node_id = 0 env.non_empty_nodes = [1, 2] observation2, reward, done, info = env.step(np.zeros(3)) assert reward == (1 + 2 - 0.5 - 0.5) / 4
def test_reward_options(self): ''' Test these: weight_poorest: bool = False, normalize_rewards: bool = True, minimum_reward: bool = False, reward_bound: float = None, include_income_to_observation: int = 0 ''' g = nx.Graph() g.add_edges_from([(0, 1), (1, 2), (2, 3)]) nx.set_node_attributes(g, { 0: (0, 1), 1: (0, 2), 2: (1, 1), 3: (1, 2) }, name="coords") orders = [(0, 1, 0, 1, 1), (1, 1, 0, 2, 2), (2, 2, 0, 3, 3), (3, 2, 0, 3, 3)] drivers = np.array([1, 0, 0, 5]) action = np.array([1, 0, 0], dtype=float) env = TaxiEnv(g, orders, 1, drivers, 3, 0.5, count_neighbors=True, normalize_rewards=False) observation = env.reset() env.current_node_id = 3 env.non_empty_nodes = [0, 1, 2] observation, reward, done, info = env.step(action) assert reward == (3 + 3 - 0.5 - 0.5 - 0.5) env = TaxiEnv(g, orders, 1, drivers, 3, 0.5, count_neighbors=True, weight_poorest=True) observation = env.reset() env.current_node_id = 3 env.non_empty_nodes = [0, 1, 2] observation, reward, done, info = env.step(action) # reward is softmax of the reard multiplied by reward r = np.array([ 0, 3, 3, -0.5, -.5, -.5 ]) # 0 is because there is a guy in the node 0 that does not move mult = 1 - env.softmax(r) rew = mult * r rew /= 5 assert reward == pytest.approx(np.sum(rew)) env = TaxiEnv(g, orders, 1, drivers, 3, 0.5, count_neighbors=True, minimum_reward=True) observation = env.reset() env.current_node_id = 3 env.non_empty_nodes = [0, 1, 2] observation, reward, done, info = env.step(action) assert reward == -0.5 / 5 # returns a single value of a minimum reward, normalized env = TaxiEnv(g, orders, 1, drivers, 3, 0.5, count_neighbors=True, normalize_rewards=False, minimum_reward=True) observation = env.reset() env.current_node_id = 3 env.non_empty_nodes = [0, 1, 2] observation, reward, done, info = env.step(action) assert reward == -0.5 # returns a single value of a minimum reward, non-normalized env = TaxiEnv(g, orders, 1, drivers, 3, 0.5, count_neighbors=True, reward_bound=1) observation = env.reset() env.current_node_id = 3 env.non_empty_nodes = [0, 1, 2] observation, reward, done, info = env.step(action) assert reward == (1 + 1 - 0.5 - 0.5 - 0.5) / 5 drivers = np.array([2, 0, 0, 5]) env = TaxiEnv(g, orders, 1, drivers, 3, 0.5, count_neighbors=True, reward_bound=1, include_income_to_observation=True) observation = env.reset() env.world.nodes[0]['info'].drivers[0].add_income(0.9) env.current_node_id = 3 env.non_empty_nodes = [0, 1, 2] # all drivers from 3rd node are moved but haven't arrived, so observation should show only the driver at 0's node observation, reward, done, info = env.step(action) assert env.current_node_id == 0 assert observation.shape[0] == 5 * env.world_size + env.n_intervals