コード例 #1
0
ファイル: ExplorationChain.py プロジェクト: kosmitive/qlearn
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(N)
        action_space = DiscreteSpace(2)

        # one maps to 2
        starting_state = 1

        # specify the transition function
        transition_func = np.zeros((N, 2), dtype=np.int32)

        # iterate over and fill with the transitions
        for i in range(N):
            transition_func[i, 0] = i - 1
            transition_func[i, 1] = i + 1

        transition_func[0, 0] = 0
        transition_func[N - 1, 1] = N - 1

        # now we define the reward function
        reward_function = np.zeros((N, 2), dtype=np.float64)
        reward_function[0, 0] = 0.001
        reward_function[N - 1, 1] = 1

        super().__init__(name, num_states, action_space, state_space, transition_func, reward_function, starting_state)
コード例 #2
0
ファイル: ExplorationTree.py プロジェクト: kosmitive/qlearn
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(2**N - 1)
        action_space = DiscreteSpace(3)

        # one maps to 2
        starting_state = 2**(N - 1) - 1

        # specify the transition function
        transition_func = np.zeros((2**N - 1, 3), dtype=np.int32)

        # iterate over and fill with the transitions
        for i in range(2**N - 1):
            transition_func[i, 0] = 2 * i + 1
            transition_func[i, 1] = int((i - 1) / 2)
            transition_func[i, 2] = 2 * i + 2

        # set head and leafs
        transition_func[0, 1] = 0

        for l in range(2**(N - 1) - 1, 2**N - 1):
            transition_func[l, 0] = l
            transition_func[l, 2] = l

        # now we define the reward function
        reward_function = np.zeros((2**N - 1, 3), dtype=np.float64)
        reward_function[2**N - 2, 0] = -1
        reward_function[2**N - 2, 2] = 1
        reward_function[2**(N - 1) - 1, 0] = 2 * (N + 1) / N - 1
        reward_function[2**(N - 1) - 1, 2] = 2 * (N + 1) / N - 1

        for state in range(2**(N - 1), 2**N - 1):
            d = state - 2.0**(N - 1)
            print((N - d + 1))
            print((N - d))
            next_rew = ((N - d) / (N - d + 1)) * reward_function[state - 1, 0]
            reward_function[2**N - 2, 0] = next_rew
            reward_function[2**N - 2, 2] = next_rew

        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)
コード例 #3
0
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(2**N)
        action_space = DiscreteSpace(N)

        # get size of state and action space
        size_space = state_space.get_size()
        size_action = action_space.get_size()

        # one maps to 2
        starting_state = 0

        # specify the transition function
        transition_func = np.zeros((size_space, size_action), dtype=np.int32)
        reward_function = np.zeros((size_space, size_action), dtype=np.float64)

        # iterate over and fill with the transitions
        for i in range(size_space):
            for j in range(size_action):
                next_state = self.flip_bit(i, j, N)
                transition_func[i, j] = next_state
                reward_function[i, j] = np.sign(i - next_state) * np.minimum(
                    i, next_state)

        nmax = np.max(reward_function)
        nmin = np.min(reward_function)
        reward_function = 2 * ((reward_function - nmin) / (nmax - nmin)) - 1
        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)
コード例 #4
0
ファイル: GridWorld.py プロジェクト: kosmitive/qlearn
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(N**2)
        action_space = DiscreteSpace(4)

        # get size of state and action space
        size_space = state_space.get_size()
        size_action = action_space.get_size()

        # one maps to 2
        starting_state = 0

        # specify the transition function
        transition_func = np.zeros((size_space, size_action), dtype=np.int32)
        reward_function = np.zeros((size_space, size_action), dtype=np.float64)

        # iterate over and fill with the transitions
        for i in range(size_space):
            transition_func[i, 0] = i if i % N == 0 else i - 1
            transition_func[i, 1] = i if i / N < 1 else i - N
            transition_func[i, 2] = i if i % N == N - 1 else i + 1
            transition_func[i, 3] = i if i / N >= N - 1 else i + N

        # now we define the reward function
        reward_function[N**2 - 1, 2] = 1
        reward_function[N**2 - 1, 3] = 1

        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)
コード例 #5
0
    def __init__(self, name, num_states, N):

        # create the state and action space
        self.inner_size = N
        state_space = DiscreteSpace(N**2)
        action_space = DiscreteSpace(2)

        # get size of state and action space
        size_space = state_space.get_size()
        size_action = action_space.get_size()

        # one maps to 2
        starting_state = 0

        # specify the transition function
        transition_func = np.zeros((size_space, size_action), dtype=np.int32)
        reward_function = np.zeros((size_space, size_action), dtype=np.float64)

        # sample the left action
        left = 1
        right = 1 - left
        chest = 2 * 1 - 1

        reward_function[:, right] = -0.01 / N

        # iterate over and fill with the transitions
        for x in range(N):
            for y in range(N):
                pos = y * N + x
                y = y if y == N - 1 else y + 1
                left_x = x if x == 0 else x - 1
                right_x = x if x == N - 1 else x + 1

                transition_func[pos, left] = y * N + left_x
                transition_func[pos, right] = y * N + right_x

        reward_function[N**2 - 1, right] = chest

        super().__init__(name, num_states, action_space, state_space,
                         transition_func, reward_function, starting_state)