Python MultiDiscrete.sample Exemples, gym.spaces.MultiDiscrete.sample Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_helpers.py Projet : fossabot/yarllib

def test_encoder_with_sampling(space):
    """Test space_encoder with sampling."""
    NUM_SAMPLES = int(np.prod(space))
    x = MultiDiscrete(space)
    e = Encoder(x)

    for _ in range(NUM_SAMPLES):
        i = x.sample()
        enc = e.encode(i)
        dec = e.decode(enc)
        assert np.equal(i, dec).all()

Exemple #2

0

Afficher le fichier

def test_multidiscrete_subspace_reproducibility():
    # 1D multi-discrete
    space = MultiDiscrete([100, 200, 300])
    space.seed(None)

    assert sample_equal(space[0].sample(), space[0].sample())
    assert sample_equal(space[0:1].sample(), space[0:1].sample())
    assert sample_equal(space[0:2].sample(), space[0:2].sample())
    assert sample_equal(space[:].sample(), space[:].sample())
    assert sample_equal(space[:].sample(), space.sample())

    # 2D multi-discrete
    space = MultiDiscrete([[300, 400, 500], [600, 700, 800]])
    space.seed(None)

    assert sample_equal(space[0, 1].sample(), space[0, 1].sample())
    assert sample_equal(space[0].sample(), space[0].sample())
    assert sample_equal(space[0:1].sample(), space[0:1].sample())
    assert sample_equal(space[0:2, :].sample(), space[0:2, :].sample())
    assert sample_equal(space[:, 0:1].sample(), space[:, 0:1].sample())
    assert sample_equal(space[0:2, 0:2].sample(), space[0:2, 0:2].sample())
    assert sample_equal(space[:].sample(), space[:].sample())
    assert sample_equal(space[:, :].sample(), space[:, :].sample())
    assert sample_equal(space[:, :].sample(), space.sample())

Exemple #3

0

Afficher le fichier

Fichier : test_space_utils.py Projet : vishalbelsare/ray

    def test_convert_element_to_space_type(self):
        """Test if space converter works for all elements/space permutations"""
        box_space = Box(low=-1, high=1, shape=(2, ))
        discrete_space = Discrete(2)
        multi_discrete_space = MultiDiscrete([2, 2])
        multi_binary_space = MultiBinary(2)
        tuple_space = Tuple((box_space, discrete_space))
        dict_space = Dict({
            "box":
            box_space,
            "discrete":
            discrete_space,
            "multi_discrete":
            multi_discrete_space,
            "multi_binary":
            multi_binary_space,
            "dict_space":
            Dict({
                "box2": box_space,
                "discrete2": discrete_space,
            }),
            "tuple_space":
            tuple_space,
        })

        box_space_uncoverted = box_space.sample().astype(np.float64)
        multi_discrete_unconverted = multi_discrete_space.sample().astype(
            np.int32)
        multi_binary_unconverted = multi_binary_space.sample().astype(np.int32)
        tuple_unconverted = (box_space_uncoverted, float(0))
        modified_element = {
            "box": box_space_uncoverted,
            "discrete": float(0),
            "multi_discrete": multi_discrete_unconverted,
            "multi_binary": multi_binary_unconverted,
            "tuple_space": tuple_unconverted,
            "dict_space": {
                "box2": box_space_uncoverted,
                "discrete2": float(0),
            },
        }
        element_with_correct_types = convert_element_to_space_type(
            modified_element, dict_space.sample())
        assert dict_space.contains(element_with_correct_types)

Exemple #4

0

Afficher le fichier

class TaxiRebalance(gym.Env, ABC):
    def __init__(self, config):
        self._config = config
        self.curr_time = 0
        self.graph = Graph()
        self.graph.import_graph(graph_file)
        self.sim = Simulator(self.graph)

        self.max_vehicle = self._config['max_vehicle']
        self.reb_interval = self._config['reb_interval']
        self.max_travel_t = self._config['max_travel_time']
        self.max_lookback_steps = int(
            np.ceil(self.max_travel_t / self.reb_interval))
        self.max_passenger = self._config['max_passenger']
        self._num_nodes = len(self._config['nodes_list'])
        self._nodes = tuple(self._config['nodes_list'])
        self._num_neighbors = self._config['near_neighbor']
        self._neighbor_map = self._get_neighbors()
        self._dispatch_rate = self._config['dispatch_rate']

        self.action_space = MultiDiscrete([(self._num_neighbors + 1) * 5] *
                                          self._num_nodes)
        self.observation_space = Tuple((Box(0,
                                            self.max_passenger,
                                            shape=(self._num_nodes, ),
                                            dtype=np.int64),
                                        Box(0,
                                            self.max_vehicle,
                                            shape=(self._num_nodes, ),
                                            dtype=np.int64)))
        self._is_running = False
        self._done = False
        self._start_time = time.time()
        self._alpha = self._config['alpha']
        self._beta = self._config['beta']
        self._step = 0
        self._total_vehicle = None
        self._travel_time = None
        self._pre_action = None
        self._episode = 0
        self._worker_id = str(hash(time.time()))
        self._save_res_every_ep = int(self._config['save_res_every_ep'])
        self._vehicle_speed = self._config['veh_speed']

    def _get_neighbors(self):
        k = self._config['near_neighbor']
        if k + 1 > len(self._nodes):
            k = len(self._nodes) - 1
            self._num_neighbors = k
        neighbor_map = dict()
        for node in self._nodes:
            dist_lst = [(dest, self.graph.graph_top[node]['nei'][dest]['dist'])
                        for dest in self.graph.graph_top[node]['nei']]
            dist_lst.sort(key=lambda x: x[1])
            neighbor_map[node] = tuple(
                self._nodes.index(x[0]) for x in dist_lst[:k + 1])
        return neighbor_map

    def _preprocess_action(self, action):
        assert isinstance(action, np.ndarray)
        if np.isnan(action).sum() > 0:
            print(self._step)
            action = self.action_space.sample()
        action_mat = np.zeros((self._num_nodes, self._num_nodes))
        for nd_idx, chosen_action in enumerate(action):
            chosen_neighbor = chosen_action // 5
            ac_idx = chosen_action % 5
            nb_idx = self._neighbor_map[self._nodes[nd_idx]][chosen_neighbor]
            dispatch_rate = (ac_idx + 1) / 5
            action_mat[nd_idx, nb_idx] = dispatch_rate
            if nb_idx != nd_idx:
                action_mat[nd_idx, nd_idx] = 1 - dispatch_rate
            else:
                action_mat[nd_idx, nd_idx] = 1
        sim_action = dict()
        for nd_idx, node in enumerate(self._nodes):
            sim_action[node] = action_mat[nd_idx, :]
        return sim_action, action_mat

    def reset(self):
        if self._done:
            self._episode += 1
            self._done = False
            # print(f'Episode: {self._episode} done!')

        if self._is_running:
            self.sim.finishing_touch(self._start_time)
            if self._episode % self._save_res_every_ep == 0:
                self.sim.save_result(RESULTS,
                                     self._worker_id,
                                     unique_name=False)
                if self._config['plot_queue_len']:
                    self.sim.plot_pass_queue_len(mode='taxi',
                                                 suffix=self._worker_id)
                    self.sim.plot_pass_wait_time(mode='taxi',
                                                 suffix=self._worker_id)
            self._is_running = False

        self.curr_time = 0
        self._step = 0

        self.graph = Graph()
        self.graph.import_graph(graph_file)
        self.sim = Simulator(self.graph)
        self.sim.import_arrival_rate(unit=(1, 'sec'))
        self.sim.import_vehicle_attribute(file_name=vehicle_file)
        self.sim.set_running_time(start_time=self._config['start_time'],
                                  time_horizon=self._config['time_horizon'],
                                  unit='hour')
        self.sim.routing.set_routing_method('simplex')
        self.sim.initialize(seed=0)
        self._total_vehicle = self.sim.vehicle_attri['taxi']['total']

        self._travel_time = np.zeros((self._num_nodes, self._num_nodes))
        for i, node in enumerate(self.graph.graph_top):
            for j, road in enumerate(self.graph.graph_top):
                if i != j:
                    self._travel_time[
                        i,
                        j] = self.graph.graph_top[node]['node'].road[road].dist
        self._travel_time /= np.linalg.norm(self._travel_time, ord=np.inf)
        self._pre_action = np.zeros((self._num_neighbors, self._num_nodes))

        with open(vehicle_file, 'r') as v_file:
            vehicle_dist = json.load(v_file)
        vehicle_dist = vehicle_dist['taxi']['distrib']
        vehicle_dist = np.array([vehicle_dist[x] for x in vehicle_dist])
        return np.zeros((self._num_nodes, )), vehicle_dist

    def step(self, action):
        self._step += 1
        if not self._is_running:
            self._is_running = True
        sim_action, action_mat = self._preprocess_action(action)
        # print(sim_action)
        p_queue, v_queue = self.sim.step(action=sim_action,
                                         step_length=self.reb_interval,
                                         curr_time=self.curr_time)
        self.curr_time += self.reb_interval
        p_queue = np.array(p_queue)
        v_queue = np.array(v_queue)
        reward = -self._beta * (p_queue.sum() * (1 - self._alpha) * 1e-1 +
                                self._alpha * self._vehicle_speed * np.maximum(
                                    (v_queue - p_queue).reshape(
                                        (self._num_nodes, 1)) * action_mat *
                                    self._travel_time, 0).sum())
        # print(self._vehicle_speed)
        # print('reward', reward)
        #print('passenger', p_queue)
        #print('vehicle', v_queue)
        #print('action',action_mat)
        #print('reward', reward)
        # print(f'at node {v_queue.sum()}, on road {self._total_vehicle - v_queue.sum()}')
        # print(f'action diff {np.linalg.norm(self._pre_action-action)}')
        self._pre_action = action
        if self.curr_time >= self._config['time_horizon'] * 3600 - 1:
            self._done = True
        return (p_queue, v_queue), reward, self._done, {}

Exemple #5

0

Afficher le fichier

class MultiDiscreteMaskEnv(gym.Env):
    metadata = {'render.modes': ['human', 'system', 'none']}

    def __init__(self):
        self.action_space = MultiDiscrete([2, 3, 4])

        self.observation_space = MultiDiscrete([4, 5])

        self.current_step = 0
        self._valid_actions1 = torch.ones(self.action_space.nvec[0])
        self._valid_actions2 = torch.ones(self.action_space.nvec[0],
                                          self.action_space.nvec[1])
        self._valid_actions3 = torch.ones(self.action_space.nvec[0],
                                          self.action_space.nvec[1],
                                          self.action_space.nvec[2])
        self._action_mask = [
            self._valid_actions1, self._valid_actions2, self._valid_actions3
        ]

    def reset(self):
        self._valid_actions1 = torch.ones(self.action_space.nvec[0])
        self._valid_actions2 = torch.ones(self.action_space.nvec[0],
                                          self.action_space.nvec[1])
        self._valid_actions3 = torch.ones(self.action_space.nvec[0],
                                          self.action_space.nvec[1],
                                          self.action_space.nvec[2])
        self._action_mask = [
            self._valid_actions1, self._valid_actions2, self._valid_actions3
        ]
        self.current_step = 0
        self._choose_next_state()
        return self.state

    def step(self, actions):
        valid_actions1 = torch.ones(self.action_space.nvec[0])
        valid_actions2 = torch.ones(self.action_space.nvec[0],
                                    self.action_space.nvec[1])
        valid_actions3 = torch.ones(self.action_space.nvec[0],
                                    self.action_space.nvec[1],
                                    self.action_space.nvec[2])

        if self._action_mask[0][actions[0]] == 0:
            raise Exception("Invalid action was selected! Valid actions: {}, "
                            "action taken: {}".format(self._action_mask[0],
                                                      actions))
        else:
            valid_actions1[actions[0]] = 0
        if self._action_mask[1][actions[0]][actions[1]] == 0:
            raise Exception("Invalid action was selected! Valid actions: {}, "
                            "action taken: {}".format(
                                self._action_mask[1][actions[0]], actions))
        else:
            valid_actions2[0][actions[1]] = 0
            valid_actions2[1][actions[1]] = 0
        if self._action_mask[2][actions[0]][actions[1]][actions[2]] == 0:
            raise Exception("Invalid action was selected! Valid actions: {}, "
                            "action taken: {}".format(
                                self._action_mask[2][actions[0][actions[2]]],
                                actions))
        else:
            valid_actions3[0][0][actions[2]] = 0
            valid_actions3[0][1][actions[2]] = 0
            valid_actions3[0][2][actions[2]] = 0
            valid_actions3[1][0][actions[2]] = 0
            valid_actions3[1][1][actions[2]] = 0
            valid_actions3[1][2][actions[2]] = 0

        self._action_mask = [valid_actions1, valid_actions2, valid_actions3]
        self._choose_next_state()
        self.current_step += 1

        return self.state, 0, self.finish(), {"action_mask": self._action_mask}

    def render(self, mode='human'):
        pass

    def finish(self):
        return self.current_step == 250

    def _choose_next_state(self):
        self.state = torch.tensor(self.observation_space.sample(),
                                  dtype=torch.long)

Exemple #6

0

Afficher le fichier

            np.array(x) >= self.low).all() and (np.array(x) <=
                                                self.high).all()

    @property
    def shape(self):
        return self.num_discrete_space

    def __repr__(self):
        return "MultiDiscrete" + str(self.num_discrete_space)

    def __eq__(self, other):
        return np.array_equal(self.low, other.low) and np.array_equal(
            self.high, other.high)


if __name__ == "__main__":
    # examples
    from gym.spaces import MultiDiscrete as MDiscrete
    md1 = MultiDiscrete([[0, 4], [0, 9]])
    sp = [md1.sample() for i in range(100)]
    d1 = [x[0] for x in sp]
    d2 = [x[1] for x in sp]
    print(min(d1), min(d2), max(d1), max(d2))

    # difference to gym.spaces.multiDiscrete
    md2 = MDiscrete(md1.high - md1.low + 1)
    sp = [md2.sample() for i in range(100)]
    d1 = [x[0] for x in sp]
    d2 = [x[1] for x in sp]
    print(min(d1), min(d2), max(d1), max(d2))