Beispiel #1
0
def test_binary_search_nd():
    bin1 = np.array([0.0, 1.0, 2.0, 3.0])  # 3 intervals
    bin2 = np.array([1.0, 2.0, 3.0, 4.0])  # 3 intervals
    bin3 = np.array([2.0, 3.0, 4.0, 5.0, 6.0])  # 4 intervals

    bins = [bin1, bin2, bin3]

    vec1 = np.array([0.0, 1.0, 2.0])
    vec2 = np.array([2.9, 3.9, 5.9])
    vec3 = np.array([1.5, 2.5, 2.5])
    vec4 = np.array([1.5, 2.5, 2.5])

    # index = i + Ni * j + Ni * Nj * k
    assert binary_search_nd(vec1, bins) == 0
    assert binary_search_nd(vec2, bins) == 2 + 3 * 2 + 3 * 3 * 3
    assert binary_search_nd(vec3, bins) == 1 + 3 * 1 + 3 * 3 * 0
Beispiel #2
0
    def sample(self, discrete_state, action):
        # map disctete state to continuous one
        assert self.observation_space.contains(discrete_state)
        continuous_state = self.get_continuous_state(discrete_state, randomize=True)
        # sample in the true environment
        next_state, reward, done, info = self.env.sample(continuous_state, action)
        # discretize next state
        next_state = binary_search_nd(next_state, self._bins)

        return next_state, reward, done, info
Beispiel #3
0
 def discretize(self, coordinates):
     return binary_search_nd(coordinates, self._bins)
Beispiel #4
0
 def get_discrete_state(self, continuous_state):
     return binary_search_nd(continuous_state, self._bins)
Beispiel #5
0
 def step(self, action):
     next_state, reward, done, info = self.env.step(action)
     next_state = binary_search_nd(next_state, self._bins)
     return next_state, reward, done, info