def test_encoder_with_sampling(space): """Test space_encoder with sampling.""" NUM_SAMPLES = int(np.prod(space)) x = MultiDiscrete(space) e = Encoder(x) for _ in range(NUM_SAMPLES): i = x.sample() enc = e.encode(i) dec = e.decode(enc) assert np.equal(i, dec).all()
def test_multidiscrete_subspace_reproducibility(): # 1D multi-discrete space = MultiDiscrete([100, 200, 300]) space.seed(None) assert sample_equal(space[0].sample(), space[0].sample()) assert sample_equal(space[0:1].sample(), space[0:1].sample()) assert sample_equal(space[0:2].sample(), space[0:2].sample()) assert sample_equal(space[:].sample(), space[:].sample()) assert sample_equal(space[:].sample(), space.sample()) # 2D multi-discrete space = MultiDiscrete([[300, 400, 500], [600, 700, 800]]) space.seed(None) assert sample_equal(space[0, 1].sample(), space[0, 1].sample()) assert sample_equal(space[0].sample(), space[0].sample()) assert sample_equal(space[0:1].sample(), space[0:1].sample()) assert sample_equal(space[0:2, :].sample(), space[0:2, :].sample()) assert sample_equal(space[:, 0:1].sample(), space[:, 0:1].sample()) assert sample_equal(space[0:2, 0:2].sample(), space[0:2, 0:2].sample()) assert sample_equal(space[:].sample(), space[:].sample()) assert sample_equal(space[:, :].sample(), space[:, :].sample()) assert sample_equal(space[:, :].sample(), space.sample())
def test_convert_element_to_space_type(self): """Test if space converter works for all elements/space permutations""" box_space = Box(low=-1, high=1, shape=(2, )) discrete_space = Discrete(2) multi_discrete_space = MultiDiscrete([2, 2]) multi_binary_space = MultiBinary(2) tuple_space = Tuple((box_space, discrete_space)) dict_space = Dict({ "box": box_space, "discrete": discrete_space, "multi_discrete": multi_discrete_space, "multi_binary": multi_binary_space, "dict_space": Dict({ "box2": box_space, "discrete2": discrete_space, }), "tuple_space": tuple_space, }) box_space_uncoverted = box_space.sample().astype(np.float64) multi_discrete_unconverted = multi_discrete_space.sample().astype( np.int32) multi_binary_unconverted = multi_binary_space.sample().astype(np.int32) tuple_unconverted = (box_space_uncoverted, float(0)) modified_element = { "box": box_space_uncoverted, "discrete": float(0), "multi_discrete": multi_discrete_unconverted, "multi_binary": multi_binary_unconverted, "tuple_space": tuple_unconverted, "dict_space": { "box2": box_space_uncoverted, "discrete2": float(0), }, } element_with_correct_types = convert_element_to_space_type( modified_element, dict_space.sample()) assert dict_space.contains(element_with_correct_types)
class TaxiRebalance(gym.Env, ABC): def __init__(self, config): self._config = config self.curr_time = 0 self.graph = Graph() self.graph.import_graph(graph_file) self.sim = Simulator(self.graph) self.max_vehicle = self._config['max_vehicle'] self.reb_interval = self._config['reb_interval'] self.max_travel_t = self._config['max_travel_time'] self.max_lookback_steps = int( np.ceil(self.max_travel_t / self.reb_interval)) self.max_passenger = self._config['max_passenger'] self._num_nodes = len(self._config['nodes_list']) self._nodes = tuple(self._config['nodes_list']) self._num_neighbors = self._config['near_neighbor'] self._neighbor_map = self._get_neighbors() self._dispatch_rate = self._config['dispatch_rate'] self.action_space = MultiDiscrete([(self._num_neighbors + 1) * 5] * self._num_nodes) self.observation_space = Tuple((Box(0, self.max_passenger, shape=(self._num_nodes, ), dtype=np.int64), Box(0, self.max_vehicle, shape=(self._num_nodes, ), dtype=np.int64))) self._is_running = False self._done = False self._start_time = time.time() self._alpha = self._config['alpha'] self._beta = self._config['beta'] self._step = 0 self._total_vehicle = None self._travel_time = None self._pre_action = None self._episode = 0 self._worker_id = str(hash(time.time())) self._save_res_every_ep = int(self._config['save_res_every_ep']) self._vehicle_speed = self._config['veh_speed'] def _get_neighbors(self): k = self._config['near_neighbor'] if k + 1 > len(self._nodes): k = len(self._nodes) - 1 self._num_neighbors = k neighbor_map = dict() for node in self._nodes: dist_lst = [(dest, self.graph.graph_top[node]['nei'][dest]['dist']) for dest in self.graph.graph_top[node]['nei']] dist_lst.sort(key=lambda x: x[1]) neighbor_map[node] = tuple( self._nodes.index(x[0]) for x in dist_lst[:k + 1]) return neighbor_map def _preprocess_action(self, action): assert isinstance(action, np.ndarray) if np.isnan(action).sum() > 0: print(self._step) action = self.action_space.sample() action_mat = np.zeros((self._num_nodes, self._num_nodes)) for nd_idx, chosen_action in enumerate(action): chosen_neighbor = chosen_action // 5 ac_idx = chosen_action % 5 nb_idx = self._neighbor_map[self._nodes[nd_idx]][chosen_neighbor] dispatch_rate = (ac_idx + 1) / 5 action_mat[nd_idx, nb_idx] = dispatch_rate if nb_idx != nd_idx: action_mat[nd_idx, nd_idx] = 1 - dispatch_rate else: action_mat[nd_idx, nd_idx] = 1 sim_action = dict() for nd_idx, node in enumerate(self._nodes): sim_action[node] = action_mat[nd_idx, :] return sim_action, action_mat def reset(self): if self._done: self._episode += 1 self._done = False # print(f'Episode: {self._episode} done!') if self._is_running: self.sim.finishing_touch(self._start_time) if self._episode % self._save_res_every_ep == 0: self.sim.save_result(RESULTS, self._worker_id, unique_name=False) if self._config['plot_queue_len']: self.sim.plot_pass_queue_len(mode='taxi', suffix=self._worker_id) self.sim.plot_pass_wait_time(mode='taxi', suffix=self._worker_id) self._is_running = False self.curr_time = 0 self._step = 0 self.graph = Graph() self.graph.import_graph(graph_file) self.sim = Simulator(self.graph) self.sim.import_arrival_rate(unit=(1, 'sec')) self.sim.import_vehicle_attribute(file_name=vehicle_file) self.sim.set_running_time(start_time=self._config['start_time'], time_horizon=self._config['time_horizon'], unit='hour') self.sim.routing.set_routing_method('simplex') self.sim.initialize(seed=0) self._total_vehicle = self.sim.vehicle_attri['taxi']['total'] self._travel_time = np.zeros((self._num_nodes, self._num_nodes)) for i, node in enumerate(self.graph.graph_top): for j, road in enumerate(self.graph.graph_top): if i != j: self._travel_time[ i, j] = self.graph.graph_top[node]['node'].road[road].dist self._travel_time /= np.linalg.norm(self._travel_time, ord=np.inf) self._pre_action = np.zeros((self._num_neighbors, self._num_nodes)) with open(vehicle_file, 'r') as v_file: vehicle_dist = json.load(v_file) vehicle_dist = vehicle_dist['taxi']['distrib'] vehicle_dist = np.array([vehicle_dist[x] for x in vehicle_dist]) return np.zeros((self._num_nodes, )), vehicle_dist def step(self, action): self._step += 1 if not self._is_running: self._is_running = True sim_action, action_mat = self._preprocess_action(action) # print(sim_action) p_queue, v_queue = self.sim.step(action=sim_action, step_length=self.reb_interval, curr_time=self.curr_time) self.curr_time += self.reb_interval p_queue = np.array(p_queue) v_queue = np.array(v_queue) reward = -self._beta * (p_queue.sum() * (1 - self._alpha) * 1e-1 + self._alpha * self._vehicle_speed * np.maximum( (v_queue - p_queue).reshape( (self._num_nodes, 1)) * action_mat * self._travel_time, 0).sum()) # print(self._vehicle_speed) # print('reward', reward) #print('passenger', p_queue) #print('vehicle', v_queue) #print('action',action_mat) #print('reward', reward) # print(f'at node {v_queue.sum()}, on road {self._total_vehicle - v_queue.sum()}') # print(f'action diff {np.linalg.norm(self._pre_action-action)}') self._pre_action = action if self.curr_time >= self._config['time_horizon'] * 3600 - 1: self._done = True return (p_queue, v_queue), reward, self._done, {}
class MultiDiscreteMaskEnv(gym.Env): metadata = {'render.modes': ['human', 'system', 'none']} def __init__(self): self.action_space = MultiDiscrete([2, 3, 4]) self.observation_space = MultiDiscrete([4, 5]) self.current_step = 0 self._valid_actions1 = torch.ones(self.action_space.nvec[0]) self._valid_actions2 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1]) self._valid_actions3 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1], self.action_space.nvec[2]) self._action_mask = [ self._valid_actions1, self._valid_actions2, self._valid_actions3 ] def reset(self): self._valid_actions1 = torch.ones(self.action_space.nvec[0]) self._valid_actions2 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1]) self._valid_actions3 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1], self.action_space.nvec[2]) self._action_mask = [ self._valid_actions1, self._valid_actions2, self._valid_actions3 ] self.current_step = 0 self._choose_next_state() return self.state def step(self, actions): valid_actions1 = torch.ones(self.action_space.nvec[0]) valid_actions2 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1]) valid_actions3 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1], self.action_space.nvec[2]) if self._action_mask[0][actions[0]] == 0: raise Exception("Invalid action was selected! Valid actions: {}, " "action taken: {}".format(self._action_mask[0], actions)) else: valid_actions1[actions[0]] = 0 if self._action_mask[1][actions[0]][actions[1]] == 0: raise Exception("Invalid action was selected! Valid actions: {}, " "action taken: {}".format( self._action_mask[1][actions[0]], actions)) else: valid_actions2[0][actions[1]] = 0 valid_actions2[1][actions[1]] = 0 if self._action_mask[2][actions[0]][actions[1]][actions[2]] == 0: raise Exception("Invalid action was selected! Valid actions: {}, " "action taken: {}".format( self._action_mask[2][actions[0][actions[2]]], actions)) else: valid_actions3[0][0][actions[2]] = 0 valid_actions3[0][1][actions[2]] = 0 valid_actions3[0][2][actions[2]] = 0 valid_actions3[1][0][actions[2]] = 0 valid_actions3[1][1][actions[2]] = 0 valid_actions3[1][2][actions[2]] = 0 self._action_mask = [valid_actions1, valid_actions2, valid_actions3] self._choose_next_state() self.current_step += 1 return self.state, 0, self.finish(), {"action_mask": self._action_mask} def render(self, mode='human'): pass def finish(self): return self.current_step == 250 def _choose_next_state(self): self.state = torch.tensor(self.observation_space.sample(), dtype=torch.long)
np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() @property def shape(self): return self.num_discrete_space def __repr__(self): return "MultiDiscrete" + str(self.num_discrete_space) def __eq__(self, other): return np.array_equal(self.low, other.low) and np.array_equal( self.high, other.high) if __name__ == "__main__": # examples from gym.spaces import MultiDiscrete as MDiscrete md1 = MultiDiscrete([[0, 4], [0, 9]]) sp = [md1.sample() for i in range(100)] d1 = [x[0] for x in sp] d2 = [x[1] for x in sp] print(min(d1), min(d2), max(d1), max(d2)) # difference to gym.spaces.multiDiscrete md2 = MDiscrete(md1.high - md1.low + 1) sp = [md2.sample() for i in range(100)] d1 = [x[0] for x in sp] d2 = [x[1] for x in sp] print(min(d1), min(d2), max(d1), max(d2))