def test_binary_search_nd(): bin1 = np.array([0.0, 1.0, 2.0, 3.0]) # 3 intervals bin2 = np.array([1.0, 2.0, 3.0, 4.0]) # 3 intervals bin3 = np.array([2.0, 3.0, 4.0, 5.0, 6.0]) # 4 intervals bins = [bin1, bin2, bin3] vec1 = np.array([0.0, 1.0, 2.0]) vec2 = np.array([2.9, 3.9, 5.9]) vec3 = np.array([1.5, 2.5, 2.5]) vec4 = np.array([1.5, 2.5, 2.5]) # index = i + Ni * j + Ni * Nj * k assert binary_search_nd(vec1, bins) == 0 assert binary_search_nd(vec2, bins) == 2 + 3 * 2 + 3 * 3 * 3 assert binary_search_nd(vec3, bins) == 1 + 3 * 1 + 3 * 3 * 0
def sample(self, discrete_state, action): # map disctete state to continuous one assert self.observation_space.contains(discrete_state) continuous_state = self.get_continuous_state(discrete_state, randomize=True) # sample in the true environment next_state, reward, done, info = self.env.sample(continuous_state, action) # discretize next state next_state = binary_search_nd(next_state, self._bins) return next_state, reward, done, info
def discretize(self, coordinates): return binary_search_nd(coordinates, self._bins)
def get_discrete_state(self, continuous_state): return binary_search_nd(continuous_state, self._bins)
def step(self, action): next_state, reward, done, info = self.env.step(action) next_state = binary_search_nd(next_state, self._bins) return next_state, reward, done, info