コード例 #1
0
    def reset(self):
        """
        Description
             -----
                Resets the PID input trajectory.

        """
        self.u = []


if __name__ == "__main__":

    # Build RL Objects
    rl = ReinforceLearning(discount_factor=0.95, states_start=300, states_stop=340, states_interval=0.5,
                           actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.5,
                           epsilon=0.2, doe=1.2, eval_period=30)

    # Building states for the problem, states will be the tracking errors
    states = np.linspace(-15, 25, 201)

    rl.user_states(list(states))

    # Building actions for the problem, actions will be inputs of u2
    actions = np.linspace(-12, 18, 121)

    rl.user_actions(actions)

    # Load Q, T, and NT matrices from previous training
    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
コード例 #2
0
             -----
                Resets the PID input trajectory.

        """
        self.u = []


if __name__ == "__main__":

    # Build RL Objects
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.2,
                           doe=1.2,
                           eval_period=30)

    # Building states for the problem, states will be the tracking errors
    states = []

    rl.x1 = np.linspace(-10, 10, 21)
    rl.x2 = np.linspace(-10, 10, 21)

    for x1 in rl.x1:
        for x2 in rl.x2:
            states.append([x1, x2])
コード例 #3
0
             -----
                Resets the PID input trajectory.

        """
        self.u = []


if __name__ == "__main__":

    # Build RL Objects
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.2,
                           doe=1.2,
                           eval_period=60)

    # Building states for the problem, states will be the tracking errors
    states = []

    rl.x1 = np.linspace(-15, 15, 121)
    rl.x2 = np.linspace(-5, 5, 41)

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])