Esempi in Python per ReinforceLearning.x2

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: RL_Module

Classe/tipologia: ReinforceLearning

Metodo/funzione: x2

Esempi su hotexamples.com: 3

ReinforceLearning.x2 in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per RL_Module.ReinforceLearning.x2, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

user_actions(6)

ReinforceLearning(6)

user_states(6)

action_selection(5)

autosave(5)

matrix_update(5)

ucb_action_selection(5)

user_matrices(5)

x1(3)

x2(3)

interpolation(2)

u1(2)

u2(2)

feedback_evaluation(1)

state_detection(1)

Esempio n. 1

Mostra file

def simulation():

    # Model Initiation
    model = LinearSystem(nsim=100,
                         model_type='MIMO',
                         x0=np.array([1.333, 4]),
                         u0=np.array([3, 6]),
                         xs=np.array([3.555, 4.666]),
                         us=np.array([5, 7]),
                         step_size=0.2)

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.2,
                           doe=1.2,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    rl.x1 = np.linspace(0, 6, 25)
    rl.x2 = np.linspace(2, 6, 17)

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = []

    rl.u1 = np.linspace(1, 7, 19)
    rl.u2 = np.linspace(4, 9, 16)

    for i in rl.u1:
        for j in rl.u2:
            actions.append([i, j])

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    rlist = []

    for episode in range(1):

        # Reset the model after each episode
        model.reset(random_init=False)
        tot_reward = 0
        state = 0
        action_index = 0

        for t in range(1, model.Nsim + 1):
            """
            Disturbance
            """

            # if t % 10 == 0:
            #     model.x[t - 1, :] += np.random.uniform(-2.1, 2.1, size=2)
            """
            RL Evaluate
            """

            if t % rl.eval_period == 0:
                state, action = rl.ucb_action_selection(model.x[t - 1, :])
                action, action_index = rl.action_selection(state,
                                                           action,
                                                           model.u[t - 1, :],
                                                           no_decay=25,
                                                           ep_greedy=True,
                                                           time=t,
                                                           min_eps_rate=0.5)
            else:
                action = model.u[t - 1, :][0]

            next_state, reward, done, info = model.step([action],
                                                        t,
                                                        obj_function="MPC")
            """
            Feedback evaluation
            """

            if t == rl.eval_feedback:
                rl.matrix_update(action_index, reward, state, model.x[t, :], 5)
                tot_reward = tot_reward + reward

        rlist.append(tot_reward)

        rl.autosave(episode, 250)

        if episode % 100 == 0:
            print(model.cost_function(transient_period=200))

    return model, rl, rlist

Esempio n. 2

Mostra file

File: RL_Tune_PID.py Progetto: yunpeng-ma/Research

def simulation():

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.8,
                           doe=0,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    rl.x1 = np.linspace(1.5, 3.5, 16)
    rl.x2 = np.linspace(3.5, 5.5, 16)

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = []

    rl.u1 = np.linspace(-0.4, 0.4, 5)
    rl.u2 = np.linspace(-0.4, 0.4, 5)

    for i in rl.u1:
        for j in rl.u2:
            actions.append([i, j])

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    sim_length = 90

    kp_ki = np.zeros([sim_length, 2])
    kp_ki[0, :] = np.array([1.4, 3.3])

    actions = np.zeros([sim_length, 2])

    for t in range(1, sim_length):
        """
        RL Evaluate
        """

        state, action = rl.ucb_action_selection(kp_ki[t - 1, :])
        action, action_index = rl.action_selection(state,
                                                   action,
                                                   actions[t - 1, :],
                                                   no_decay=25,
                                                   ep_greedy=False,
                                                   time=t,
                                                   min_eps_rate=0.4)
        actions[t, :] = action

        kp_ki[t, :], reward, x_trajectory = sim(kp_ki[t - 1], action)
        """
        Feedback evaluation
        """

        rl.matrix_update(action_index, reward, state, kp_ki[t, :], 5)

        # Bind actions
        if (kp_ki[t, :] > np.array([8, 8])).any() or (kp_ki[t, :] < np.array(
            [0, 0])).any():
            kp_ki[t, :] = np.array([1.5, 3.5])

        rl.autosave(t, 250)

    return kp_ki, actions, rl

Esempio n. 3

Mostra file

File: RL_SIMO_Linear.py Progetto: yunpeng-ma/Research

def simulation():

    # Model Initiation
    model = SIMOSystem(nsim=50,
                       x0=np.array([2, 2 / 3]),
                       u0=np.array([2]),
                       xs=np.array([4, 4 / 3]),
                       us=np.array([4]),
                       step_size=0.2,
                       control=False,
                       q_cost=1,
                       r_cost=0.5,
                       s_cost=0.3,
                       random_seed=1)

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.9,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.1,
                           epsilon=0.9,
                           doe=0,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    x1 = np.zeros(15)
    x1[0:2] = np.linspace(2, 3, 2)
    x1[2:13] = np.linspace(3.5, 4.5, 11)
    x1[13:15] = np.linspace(5, 5.5, 2)

    x2 = np.zeros(15)
    x2[0:2] = np.linspace(0, 2 / 3, 2)
    x2[2:13] = np.linspace(1, 2, 11)
    x2[5] = 1.33
    x2[13:15] = np.linspace(2.25, 2.5, 2)

    rl.x1 = x1
    rl.x2 = x2

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = np.linspace(2, 6, 17)

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    rlist = []

    for episode in range(1):

        # Reset the model after each episode
        model.reset(random_init=False)

        tot_reward = 0
        state = 0
        action_index = 0

        for t in range(1, model.Nsim + 1):
            """
            Disturbance
            """

            # if t % 5 == 0:
            #     model.x[t - 1, :] += np.random.uniform(-2.1, 2.1, size=1)
            """
            RL Evaluate
            """

            if t % rl.eval_period == 0:
                state, action = rl.ucb_action_selection(model.x[t - 1, :])
                action, action_index = rl.action_selection(state,
                                                           action,
                                                           model.u[t - 1, :],
                                                           no_decay=25,
                                                           ep_greedy=False,
                                                           time=t,
                                                           min_eps_rate=0.6)

            else:
                action = model.u[t - 1, :][0]

            next_state, reward, done, info = model.step([action],
                                                        t,
                                                        obj_function="MPC",
                                                        delta_u='l1')
            """
            Feedback evaluation
            """

            if t == rl.eval_feedback:
                rl.matrix_update(action_index, reward, state, model.x[t, :], 5)
                tot_reward = tot_reward + reward

        rlist.append(tot_reward)

        rl.autosave(episode, 250)

        if episode % 100 == 0:
            print(model.cost_function(transient_period=200))

    return model, rl, rlist