예제 #1
0
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(N)
        action_space = DiscreteSpace(2)

        # one maps to 2
        starting_state = 1

        # specify the transition function
        transition_func = np.zeros((N, 2), dtype=np.int32)

        # iterate over and fill with the transitions
        for i in range(N):
            transition_func[i, 0] = i - 1
            transition_func[i, 1] = i + 1

        transition_func[0, 0] = 0
        transition_func[N - 1, 1] = N - 1

        # now we define the reward function
        reward_function = np.zeros((N, 2), dtype=np.float64)
        reward_function[0, 0] = 0.001
        reward_function[N - 1, 1] = 1

        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)
예제 #2
0
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(2**N - 1)
        action_space = DiscreteSpace(3)

        # one maps to 2
        starting_state = 2**(N - 1)

        # specify the transition function
        transition_func = np.zeros((2**N - 1, 3), dtype=np.int32)

        # iterate over and fill with the transitions
        for i in range(2**N - 1):
            transition_func[i, 0] = 2 * i + 1
            transition_func[i, 1] = int((i - 1) / 2)
            transition_func[i, 2] = 2 * i + 2

        # set head and leafs
        transition_func[0, 1] = 0

        for l in range(2**(N - 1) - 1, 2**N - 1):
            transition_func[l, 0] = l
            transition_func[l, 2] = l

        # now we define the reward function
        reward_function = np.zeros((2**N - 1, 3), dtype=np.float64)

        reward_function[2**N - 2, 0] = N - 1
        reward_function[2**N - 2, 2] = N - 1

        for l in range(N - 1):
            for s in range(2**(l + 1) - 1, 2**(l + 1) + l):
                reward_function[l, 1] = -0.1

        for l in range(N - 1):
            for s in range(2**l - 1, 2**l + l + 1):
                reward_function[l, 0] = -0.1
                reward_function[l, 2] = -0.1

        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)
예제 #3
0
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(2**N)
        action_space = DiscreteSpace(N)

        # get size of state and action space
        size_space = state_space.get_size()
        size_action = action_space.get_size()

        # one maps to 2
        starting_state = 0

        # specify the transition function
        transition_func = np.zeros((size_space, size_action), dtype=np.int32)
        reward_function = np.zeros((size_space, size_action), dtype=np.float64)

        # iterate over and fill with the transitions
        for i in range(size_space):
            for j in range(size_action):
                next_state = self.flip_bit(i, j, N)
                transition_func[i, j] = next_state
                reward_function[i, j] = np.sign(i - next_state) * np.minimum(
                    i, next_state)

        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)
예제 #4
0
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(N ** 2)
        action_space = DiscreteSpace(4)

        # get size of state and action space
        size_space = state_space.get_size()
        size_action = action_space.get_size()

        # one maps to 2
        starting_state = 0

        # specify the transition function
        transition_func = np.zeros((size_space, size_action), dtype=np.int32)
        reward_function = np.zeros((size_space, size_action), dtype=np.float64)

        # iterate over and fill with the transitions
        for i in range(size_space):
            transition_func[i, 0] = i if i % N == 0 else i - 1
            transition_func[i, 1] = i if i / N < 1 else i - N
            transition_func[i, 2] = i if i % N == N - 1 else i + 1
            transition_func[i, 3] = i if i / N >= N - 1 else i + N

        # now we define the reward function
        reward_function[N ** 2 - 1, 2] = 1
        reward_function[N ** 2 - 1, 3] = 1

        super().__init__(name, num_states, action_space, state_space, transition_func, reward_function, starting_state)
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(N**2)
        action_space = DiscreteSpace(2)

        # get size of state and action space
        size_space = state_space.get_size()
        size_action = action_space.get_size()

        # one maps to 2
        starting_state = N + 1

        # specify the transition function
        transition_func = np.zeros((size_space, size_action), dtype=np.int32)
        reward_function = np.zeros((size_space, size_action), dtype=np.float64)

        # sample the left action
        left = 1
        right = 1 - left
        chest = 2 * 0 - 1

        # iterate over and fill with the transitions
        for x in range(N):
            for y in range(N):
                pos = y * N + x
                y = y if y == N - 1 else y + 1
                left_x = x if x == 0 else x - 1
                right_x = x if x == N - 1 else x + 1

                transition_func[pos, left] = y * N + left_x
                transition_func[pos, right] = y * N + right_x

        reward_function[N**2 - 1, right] = chest

        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)