def __init__(self, name, num_states, N): # create the state and action space self.inner_size = N state_space = DiscreteSpace(N) action_space = DiscreteSpace(2) # one maps to 2 starting_state = 1 # specify the transition function transition_func = np.zeros((N, 2), dtype=np.int32) # iterate over and fill with the transitions for i in range(N): transition_func[i, 0] = i - 1 transition_func[i, 1] = i + 1 transition_func[0, 0] = 0 transition_func[N - 1, 1] = N - 1 # now we define the reward function reward_function = np.zeros((N, 2), dtype=np.float64) reward_function[0, 0] = 0.001 reward_function[N - 1, 1] = 1 super().__init__(name, num_states, action_space, state_space, transition_func, reward_function, starting_state)
def __init__(self, name, num_states, N): # create the state and action space self.inner_size = N state_space = DiscreteSpace(2**N - 1) action_space = DiscreteSpace(3) # one maps to 2 starting_state = 2**(N - 1) # specify the transition function transition_func = np.zeros((2**N - 1, 3), dtype=np.int32) # iterate over and fill with the transitions for i in range(2**N - 1): transition_func[i, 0] = 2 * i + 1 transition_func[i, 1] = int((i - 1) / 2) transition_func[i, 2] = 2 * i + 2 # set head and leafs transition_func[0, 1] = 0 for l in range(2**(N - 1) - 1, 2**N - 1): transition_func[l, 0] = l transition_func[l, 2] = l # now we define the reward function reward_function = np.zeros((2**N - 1, 3), dtype=np.float64) reward_function[2**N - 2, 0] = N - 1 reward_function[2**N - 2, 2] = N - 1 for l in range(N - 1): for s in range(2**(l + 1) - 1, 2**(l + 1) + l): reward_function[l, 1] = -0.1 for l in range(N - 1): for s in range(2**l - 1, 2**l + l + 1): reward_function[l, 0] = -0.1 reward_function[l, 2] = -0.1 super().__init__(name, num_states, action_space, state_space, transition_func, reward_function, starting_state)
def __init__(self, name, num_states, N): # create the state and action space self.inner_size = N state_space = DiscreteSpace(2**N) action_space = DiscreteSpace(N) # get size of state and action space size_space = state_space.get_size() size_action = action_space.get_size() # one maps to 2 starting_state = 0 # specify the transition function transition_func = np.zeros((size_space, size_action), dtype=np.int32) reward_function = np.zeros((size_space, size_action), dtype=np.float64) # iterate over and fill with the transitions for i in range(size_space): for j in range(size_action): next_state = self.flip_bit(i, j, N) transition_func[i, j] = next_state reward_function[i, j] = np.sign(i - next_state) * np.minimum( i, next_state) super().__init__(name, num_states, action_space, state_space, transition_func, reward_function, starting_state)
def __init__(self, name, num_states, N): # create the state and action space self.inner_size = N state_space = DiscreteSpace(N ** 2) action_space = DiscreteSpace(4) # get size of state and action space size_space = state_space.get_size() size_action = action_space.get_size() # one maps to 2 starting_state = 0 # specify the transition function transition_func = np.zeros((size_space, size_action), dtype=np.int32) reward_function = np.zeros((size_space, size_action), dtype=np.float64) # iterate over and fill with the transitions for i in range(size_space): transition_func[i, 0] = i if i % N == 0 else i - 1 transition_func[i, 1] = i if i / N < 1 else i - N transition_func[i, 2] = i if i % N == N - 1 else i + 1 transition_func[i, 3] = i if i / N >= N - 1 else i + N # now we define the reward function reward_function[N ** 2 - 1, 2] = 1 reward_function[N ** 2 - 1, 3] = 1 super().__init__(name, num_states, action_space, state_space, transition_func, reward_function, starting_state)
def __init__(self, name, num_states, N): # create the state and action space self.inner_size = N state_space = DiscreteSpace(N**2) action_space = DiscreteSpace(2) # get size of state and action space size_space = state_space.get_size() size_action = action_space.get_size() # one maps to 2 starting_state = N + 1 # specify the transition function transition_func = np.zeros((size_space, size_action), dtype=np.int32) reward_function = np.zeros((size_space, size_action), dtype=np.float64) # sample the left action left = 1 right = 1 - left chest = 2 * 0 - 1 # iterate over and fill with the transitions for x in range(N): for y in range(N): pos = y * N + x y = y if y == N - 1 else y + 1 left_x = x if x == 0 else x - 1 right_x = x if x == N - 1 else x + 1 transition_func[pos, left] = y * N + left_x transition_func[pos, right] = y * N + right_x reward_function[N**2 - 1, right] = chest super().__init__(name, num_states, action_space, state_space, transition_func, reward_function, starting_state)