Python ReinforceLearning.user_actions 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: RL_Module

클래스/타입: ReinforceLearning

메소드/함수: user_actions

hotexamples.com에서의 예제들: 6

Python ReinforceLearning.user_actions - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 RL_Module.ReinforceLearning.user_actions에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

user_actions(6)

ReinforceLearning(6)

user_states(6)

action_selection(5)

autosave(5)

matrix_update(5)

ucb_action_selection(5)

user_matrices(5)

x1(3)

x2(3)

interpolation(2)

u1(2)

u2(2)

feedback_evaluation(1)

state_detection(1)

예제 #1

파일 보기

def simulation():

    # Model Initiation
    model = LinearSystem(nsim=100, model_type='SISO', x0=np.array([0.5]), u0=np.array([1]), xs=np.array([5]),
                         us=np.array([10]), step_size=0.2)

    # model = LinearSystem(nsim=100, model_type='MIMO', x0=np.array())

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.95, states_start=300, states_stop=340, states_interval=0.5,
                           actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.5,
                           epsilon=0.2, doe=1.2, eval_period=1)

    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = np.zeros([27])
    states[0:12] = np.linspace(0, 2.5, 12)
    states[12:27] = np.linspace(3, 8, 15)

    rl.user_states(list(states))

    # actions = np.zeros([20])
    # actions[0:5] = np.linspace(290, 298, 5)
    actions = np.linspace(5, 15, 16)
    # actions[30:35] = np.linspace(302, 310, 5)

    rl.user_actions(list(actions))

    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)

    """
    Simulation portion
    """

    rlist = []

    for episode in range(1):

        # Reset the model after each episode
        model.reset(random_init=False)
        tot_reward = 0
        state = 0
        action_index = 0

        for t in range(1, model.Nsim + 1):

            """
            Disturbance
            """

            # if t % 30 == 0:
            #     model.x[t - 1] += np.random.uniform(-5, 3)

            """
            RL Evaluate
            """

            if t % rl.eval_period == 0:
                state, action = rl.ucb_action_selection(model.x[t - 1, 0])
                action, action_index = rl.action_selection(state, action, model.u[t - 1, 0], no_decay=25,
                                                           ep_greedy=False, time=t,
                                                           min_eps_rate=0.5)
                # Use interpolation to perform action
                action = rl.interpolation(model.x[t - 1, 0])
            else:
                action = model.u[t - 1, :][0]

            next_state, reward, done, info = model.step([action], t, obj_function="MPC")

            """
            Feedback evaluation
            """

            if t == rl.eval_feedback:
                rl.matrix_update(action_index, reward, state, model.x[t, 0], 5)
                tot_reward = tot_reward + reward

        rlist.append(tot_reward)

        rl.autosave(episode, 250)

        if episode % 100 == 0:
            print(model.cost_function(transient_period=120))

    return model, rl, rlist

예제 #2

파일 보기

파일: RL_Tune_PID.py 프로젝트: yunpeng-ma/Research

def simulation():

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.8,
                           doe=0,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    rl.x1 = np.linspace(1.5, 3.5, 16)
    rl.x2 = np.linspace(3.5, 5.5, 16)

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = []

    rl.u1 = np.linspace(-0.4, 0.4, 5)
    rl.u2 = np.linspace(-0.4, 0.4, 5)

    for i in rl.u1:
        for j in rl.u2:
            actions.append([i, j])

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    sim_length = 90

    kp_ki = np.zeros([sim_length, 2])
    kp_ki[0, :] = np.array([1.4, 3.3])

    actions = np.zeros([sim_length, 2])

    for t in range(1, sim_length):
        """
        RL Evaluate
        """

        state, action = rl.ucb_action_selection(kp_ki[t - 1, :])
        action, action_index = rl.action_selection(state,
                                                   action,
                                                   actions[t - 1, :],
                                                   no_decay=25,
                                                   ep_greedy=False,
                                                   time=t,
                                                   min_eps_rate=0.4)
        actions[t, :] = action

        kp_ki[t, :], reward, x_trajectory = sim(kp_ki[t - 1], action)
        """
        Feedback evaluation
        """

        rl.matrix_update(action_index, reward, state, kp_ki[t, :], 5)

        # Bind actions
        if (kp_ki[t, :] > np.array([8, 8])).any() or (kp_ki[t, :] < np.array(
            [0, 0])).any():
            kp_ki[t, :] = np.array([1.5, 3.5])

        rl.autosave(t, 250)

    return kp_ki, actions, rl

예제 #3

파일 보기

def simulation():

    # Model Initiation
    model = LinearSystem(nsim=100,
                         model_type='SISO',
                         x0=np.array([0.5]),
                         u0=np.array([1]),
                         xs=np.array([1.5]),
                         us=np.array([3]),
                         step_size=0.2)

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.90,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.1,
                           epsilon=1,
                           doe=0,
                           eval_period=5)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    # Set-point tracking errors
    states = np.zeros(39)
    states[0:5] = np.linspace(-4, -2, 5)
    states[34:39] = np.linspace(2, 4, 5)
    states[5:34] = np.linspace(-1.8, 1.8, 29)

    rl.user_states(list(states))

    actions = np.linspace(-4, 4, 33)

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    rlist = []

    for episode in range(1):

        # Reset the model after each episode
        model.reset(random_init=False)
        tot_reward = 0
        state = 0
        action_index = 0

        for t in range(1, model.Nsim + 1):
            """
            Disturbance
            """

            # if t % 21 == 0:
            #     model.xs = np.array([np.random.uniform(1, 3)])
            #     # model.xs = np.array([2.5])
            #     print(model.xs)

            if t % 25 == 0:
                model.xs = np.array([2])

            if t % 50 == 0:
                model.xs = np.array([1])

            if t % 75 == 0:
                model.xs = np.array([1.5])
            """
            RL Evaluate
            """
            tracking_error = (model.x[t - 1] - model.xs)[0]

            if t % rl.eval_period == 0:
                state, action = rl.ucb_action_selection(tracking_error)
                action, action_index = rl.action_selection(state,
                                                           action,
                                                           model.u[t - 1, 0],
                                                           no_decay=25,
                                                           ep_greedy=False,
                                                           time=t,
                                                           min_eps_rate=0.25)
                # Interpolation action selection
                # action = rl.interpolation(tracking_error)
            else:
                action = 0

            inputs = model.u[t - 1] + action

            next_state, reward, done, info = model.step(inputs,
                                                        t,
                                                        obj_function="MPC")
            """
            Feedback evaluation
            """

            if t == rl.eval_feedback:
                feedback_tracking_error = (model.x[t, 0] - model.xs)[0]
                if abs(feedback_tracking_error) > 7.5:
                    break

                rl.matrix_update(action_index, reward, state,
                                 feedback_tracking_error, 5)
                tot_reward = tot_reward + reward

        rlist.append(tot_reward)

        rl.autosave(episode, 250)

        if episode % 100 == 0:
            print(model.cost_function(transient_period=120))

    return model, rl, rlist

예제 #4

파일 보기

def simulation():

    # Model Initiation
    model = LinearSystem(nsim=100,
                         model_type='MIMO',
                         x0=np.array([1.333, 4]),
                         u0=np.array([3, 6]),
                         xs=np.array([3.555, 4.666]),
                         us=np.array([5, 7]),
                         step_size=0.2)

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.2,
                           doe=1.2,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    rl.x1 = np.linspace(0, 6, 25)
    rl.x2 = np.linspace(2, 6, 17)

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = []

    rl.u1 = np.linspace(1, 7, 19)
    rl.u2 = np.linspace(4, 9, 16)

    for i in rl.u1:
        for j in rl.u2:
            actions.append([i, j])

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    rlist = []

    for episode in range(1):

        # Reset the model after each episode
        model.reset(random_init=False)
        tot_reward = 0
        state = 0
        action_index = 0

        for t in range(1, model.Nsim + 1):
            """
            Disturbance
            """

            # if t % 10 == 0:
            #     model.x[t - 1, :] += np.random.uniform(-2.1, 2.1, size=2)
            """
            RL Evaluate
            """

            if t % rl.eval_period == 0:
                state, action = rl.ucb_action_selection(model.x[t - 1, :])
                action, action_index = rl.action_selection(state,
                                                           action,
                                                           model.u[t - 1, :],
                                                           no_decay=25,
                                                           ep_greedy=True,
                                                           time=t,
                                                           min_eps_rate=0.5)
            else:
                action = model.u[t - 1, :][0]

            next_state, reward, done, info = model.step([action],
                                                        t,
                                                        obj_function="MPC")
            """
            Feedback evaluation
            """

            if t == rl.eval_feedback:
                rl.matrix_update(action_index, reward, state, model.x[t, :], 5)
                tot_reward = tot_reward + reward

        rlist.append(tot_reward)

        rl.autosave(episode, 250)

        if episode % 100 == 0:
            print(model.cost_function(transient_period=200))

    return model, rl, rlist

예제 #5

파일 보기

파일: RL_SIMO_Linear.py 프로젝트: yunpeng-ma/Research

def simulation():

    # Model Initiation
    model = SIMOSystem(nsim=50,
                       x0=np.array([2, 2 / 3]),
                       u0=np.array([2]),
                       xs=np.array([4, 4 / 3]),
                       us=np.array([4]),
                       step_size=0.2,
                       control=False,
                       q_cost=1,
                       r_cost=0.5,
                       s_cost=0.3,
                       random_seed=1)

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.9,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.1,
                           epsilon=0.9,
                           doe=0,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    x1 = np.zeros(15)
    x1[0:2] = np.linspace(2, 3, 2)
    x1[2:13] = np.linspace(3.5, 4.5, 11)
    x1[13:15] = np.linspace(5, 5.5, 2)

    x2 = np.zeros(15)
    x2[0:2] = np.linspace(0, 2 / 3, 2)
    x2[2:13] = np.linspace(1, 2, 11)
    x2[5] = 1.33
    x2[13:15] = np.linspace(2.25, 2.5, 2)

    rl.x1 = x1
    rl.x2 = x2

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = np.linspace(2, 6, 17)

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    rlist = []

    for episode in range(1):

        # Reset the model after each episode
        model.reset(random_init=False)

        tot_reward = 0
        state = 0
        action_index = 0

        for t in range(1, model.Nsim + 1):
            """
            Disturbance
            """

            # if t % 5 == 0:
            #     model.x[t - 1, :] += np.random.uniform(-2.1, 2.1, size=1)
            """
            RL Evaluate
            """

            if t % rl.eval_period == 0:
                state, action = rl.ucb_action_selection(model.x[t - 1, :])
                action, action_index = rl.action_selection(state,
                                                           action,
                                                           model.u[t - 1, :],
                                                           no_decay=25,
                                                           ep_greedy=False,
                                                           time=t,
                                                           min_eps_rate=0.6)

            else:
                action = model.u[t - 1, :][0]

            next_state, reward, done, info = model.step([action],
                                                        t,
                                                        obj_function="MPC",
                                                        delta_u='l1')
            """
            Feedback evaluation
            """

            if t == rl.eval_feedback:
                rl.matrix_update(action_index, reward, state, model.x[t, :], 5)
                tot_reward = tot_reward + reward

        rlist.append(tot_reward)

        rl.autosave(episode, 250)

        if episode % 100 == 0:
            print(model.cost_function(transient_period=200))

    return model, rl, rlist

예제 #6

파일 보기

                       learning_rate=0.1,
                       epsilon=0.1,
                       doe=0,
                       eval_period=5)

states = np.zeros([75])
states[0:15] = np.arange(290, 310, 20 / 15)
states[15:60] = np.arange(311, 330, 19 / 45)
states[60:75] = np.arange(331, 350, 19 / 15)

rl.user_states(list(states))

actions = np.zeros([20])
actions[1:20] = np.arange(-10, 10, 20 / 19)

rl.user_actions(list(actions))

num_of_episodes = 1

tf.reset_default_graph()

inputs = tf.placeholder(shape=[1, len(states)], dtype=tf.float32)
W = tf.Variable(tf.random_uniform([len(states), len(actions)], 0, 0.01))
Qout = tf.matmul(inputs, W)
predict = tf.argmax(Qout, 1)

nextQ = tf.placeholder(shape=[1, len(actions)], dtype=tf.float32)
loss = tf.reduce_sum(tf.square(nextQ - Qout))
trainer = tf.train.GradientDescentOptimizer(learning_rate=rl.learning_rate)
updateModel = trainer.minimize(loss)