Ejemplo n.º 1
0
 def __init__(self, system, task, model):
     super().__init__(system, task, model)
     A, B = model.to_linear()
     state_dim = model.state_dim
     Q, R, F = task.get_cost().get_cost_matrices()
     Qp = np.zeros((state_dim, state_dim))
     Qp[:Q.shape[0], :Q.shape[1]] = Q
     X, L, K = dare(A, B, Qp, R)
     self.K = K
     self.Qp, self.Rp = Qp, R
     self.model = model
Ejemplo n.º 2
0
def K_calc(A, C, Q, R, S):
    n_A = A[0, :].size
    try:
        P, L, G = cnt.dare(A.T, C.T, Q, R, S, np.identity(n_A))
        K = np.dot(np.dot(A, P), C.T) + S
        K = np.dot(K, np.linalg.inv(np.dot(np.dot(C, P), C.T) + R))
        Calculated = True
    except:
        K = []
        print("Kalman filter cannot be calculated")
        Calculated = False
    return K, Calculated
Ejemplo n.º 3
0
def K_calc(A, C, Q, R, S):
    n_A = A[0, :].size
    try:
        import control.matlab as cnt
    except ImportError:
        import warnings
        warnings.warn(
            'could not import control (see github.com/python-control), skipping Kalman filter step',
            stacklevel=1,
            source=None)

    try:
        P, L, G = cnt.dare(A.T, C.T, Q, R, S, np.identity(n_A))
        K = np.dot(np.dot(A, P), C.T) + S
        K = np.dot(K, np.linalg.inv(np.dot(np.dot(C, P), C.T) + R))
        Calculated = True
    except:
        K = []
        # print("Kalman filter cannot be calculated")
        Calculated = False
    return K, Calculated
Ejemplo n.º 4
0
        observation, cost, done, info = env.step(u[:, 0])
        true_x_prime = np.vstack(observation)

        testing_norms.append(utilities.l2_norm(true_x_prime,
                                               predicted_x_prime))

        x = true_x_prime

testing_norms = np.array(testing_norms)
print("Mean testing norm:", np.mean(testing_norms))

#%% LQR w/ Entropy
gamma = 0.99
lamb = 0.1

soln = dare(A * np.sqrt(gamma), B * np.sqrt(gamma), Q, R)
P = soln[0]
# C = np.array(dlqr(A, B, Q, R)[0])
#! Check this again
C = np.linalg.inv(R + gamma * B.T @ P @ B) @ (gamma * B.T @ P @ A)
sigma_t = sigma_t = np.linalg.inv(R + B.T @ P @ B)

#%% Test optimal policy
num_episodes = 100
optimal_costs = np.zeros([num_episodes])
optimal_steps = np.zeros([num_episodes])
for episode in range(num_episodes):
    x = env.reset()

    done = False
    while not done and optimal_steps[episode] < 200:
Ejemplo n.º 5
0
def callback(Data):
    global messageProcessingIndex
    global Occlusioncounter
    global X_iter
    global Y_iter
    global Learnt_Rstorage
    global U_applied
    global traMat
    global matfile1
    global matfile2
    global matfile3
    global X
    global Y
    global u1_observed
    global u2_observed
    global trueIndex
    occlusionflag = 0

    theta_max = 100
    theta_min = 0.001
    theta1 = 0.6
    theta2 = 0.3
    R1_control = block_diag(theta1, 1)  # for the control matrix at agent 1
    R2_control = block_diag(1, theta2)  # for the control matrix at agent 2

    #todo ensure that marker 0 is always one robot and marker 1 is the other one

    try:
        # robot1Pose = np.array([[Data.markers[0].translation.x],[Data.markers[0].translation.y]])
        # robot2Pose = np.array([[Data.markers[1].translation.x],[Data.markers[1].translation.y]])
        robot1Pose = np.array([[Data.markers[0].translation.x]\
#            ,[Data.markers[0].translation.y]])

            ,[Data.markers[0].translation.y + 1000]])  # for change in origin
        robot2Pose = np.array([[Data.markers[1].translation.x]\
#            ,[Data.markers[1].translation.y]])

            ,[Data.markers[1].translation.y + 1000]])

        robot1Pose = robot1Pose * 0.001  #to convert mocap coordinates to meters
        robot2Pose = robot2Pose * 0.001
        messageProcessingIndex = messageProcessingIndex + 1
#        trueIndex = messageProcessingIndex - 1 # the index of the message (sample) being processed

    except IndexError:
        print(
            'Occlusion happened. Ensure markers are in the  field of view of VICON'
        )
        occlusionflag = 1
        Occlusioncounter = Occlusioncounter + 1

    try:
        if messageProcessingIndex == 1:
            # find the transformation matrix
            traMat = findTransfromationMatrix(robot1Pose, robot2Pose)

        if messageProcessingIndex > 1:  # perform learning as soon as you observe
            R_hat = block_diag(Learnt_Rstorage[(trueIndex - 1)][0, 0],
                               Learnt_Rstorage[(trueIndex - 1)][0, 1])
            # Theta 1 learning, agents start at 0. So agent 0
            Rhat_theta1_update = agentlearning(A, B, Q, R_hat, u1_observed, X,
                                               theta_max, theta_min, 0)
            # Theta 2 learning, agents start at 0. So agent 1
            Rhat_theta2_update = agentlearning(A, B, Q, R_hat, u2_observed, X,
                                               theta_max, theta_min, 1)
            #            Learnt_Rstorage = np.concatenate(
            #                (Learnt_Rstorage,np.array([[Rhat_theta1_update[0,0],Rhat_theta2_update[1,1]]]) ))
            Learnt_Rstorage.append(
                np.array([[Rhat_theta1_update[0, 0], Rhat_theta2_update[1,
                                                                        1]]]))
            #X_iter = np.concatenate((X_iter, X))
            X_iter.append(X.T)
            #Y_iter = np.concatenate((Y_iter, Y))
            Y_iter.append(Y.T)
            # form correct control matrices
            R1_control[1, 1] = Rhat_theta2_update[1, 1]
            R2_control[0, 0] = Rhat_theta1_update[0, 0]

        if occlusionflag == 1 and messageProcessingIndex > 1:
            X = dot(A, X) + dot(B, np.array([[u1_observed], [
                u2_observed
            ]]))  # using predicted value for the state
            Y = dot(C, X)

        if occlusionflag == 0:
            ro1ExpPos = dot(
                traMat,
                robot1Pose)  # robot one pose in Experiment coordinate system
            ro2ExpPos = dot(
                traMat,
                robot2Pose)  # robot two pose in Experiment coordinate system
            Y = np.array([[0.5 * (ro1ExpPos[0, 0] + ro2ExpPos[0, 0])],
                          [(ro2ExpPos[0, 0] - ro1ExpPos[0, 0])]])
            X = dot(Cinv, Y)

        if messageProcessingIndex > 0:
            [Sinf_ag1, L_ag1, G_ag1] = dare(A,
                                            B,
                                            Q,
                                            R1_control,
                                            S=None,
                                            E=None)
            [Sinf_ag2, L_ag2, G_ag2] = dare(A,
                                            B,
                                            Q,
                                            R2_control,
                                            S=None,
                                            E=None)
            U1_observed = -dot(G_ag1, X)  # 2 x 1 vector
            U2_observed = -dot(G_ag2, X)  # 2 x 1 vector
            u1_observed = U1_observed[0, 0]  # control to be applied at agent 1
            u2_observed = U2_observed[1, 0]  # control to be applied at agent 2
            socket.send_string(
                "%4.3f %4.3f" %
                (u1_observed, u2_observed))  # sending control to robots
            U_applied.append(np.array([[u1_observed, u2_observed]]))
            trueIndex = trueIndex + 1


#      adding disturbance to robots
#        if trueIndex == 400:
        if (1000 <= messageProcessingIndex <= 1020):
            print('applying disturbance')
            socket.send_string("%4.3f %4.3f" %
                               (-30, 0))  # sending control to robots

    except KeyboardInterrupt:
        print('Interrupted due to abnormal robot behaviour or intentionally')
        sio.savemat(matfile1, mdict={'states': X_iter}, oned_as='row')
        sio.savemat(matfile4, mdict={'Outputs': Y_iter}, oned_as='row')
        sio.savemat(matfile2,
                    mdict={'LearntR': Learnt_Rstorage},
                    oned_as='row')
        sio.savemat(matfile3,
                    mdict={'AppliedControl': U_applied},
                    oned_as='row')
        for dummy in range(50):
            socket.send_string("%4.3f %4.3f " % (0, 0))
            socket.send_string("0.0 0.0")
        try:
            sys.exit(0)
        except SystemExit:
            os._exit(0)
Ejemplo n.º 6
0
def agentlearning(A, B, Q, R_est, u_obs, X, theta_max, theta_min, ForAgent):

    # for now considering only one input at each agent
    # this function provides an update for learnt theta's for each agent
    # this is the crux of learning algorithm
    # ForAgent - for whom learning is taking place
    # estimation for u1 is done at agent 1 as well, it doesn't matter where the
    # learning is being carried out.
    # running the learning loop 10 times per one iteration of control loop
    # u_obs - observed value of (ForAgent) control by (atAgent). Observation will be same everywhere

    N_iter = 10  # It starts from 0. (N_iter+1) times learning loop runs per a run of control loop

    Est_R = np.copy(
        R_est,
        order='k')  # Est_R is the R matrix used for estimation in the function
    [Sinf_thetaEst, L_thetaEst, G_thetaEst] = dare(A,
                                                   B,
                                                   Q,
                                                   Est_R,
                                                   S=None,
                                                   E=None)
    U_est = -dot(G_thetaEst, X)
    u_est = U_est[ForAgent, 0]

    if (abs(u_est - u_obs) < 1e-5):  # set precision for estimation
        return Est_R

    Est_R_MAX = np.copy(R_est, order='k')
    Est_R_MAX[ForAgent,
              ForAgent] = theta_max  # agent indexing should start from 0
    [Sinf_thetaMax, L_thetaMax, G_thetaMax] = dare(A,
                                                   B,
                                                   Q,
                                                   Est_R_MAX,
                                                   S=None,
                                                   E=None)

    Est_R_MIN = np.copy(R_est, order='k')
    Est_R_MIN[ForAgent, ForAgent] = theta_min
    [Sinf_thetaMin, L_thetaMin, G_thetaMin] = dare(A,
                                                   B,
                                                   Q,
                                                   Est_R_MIN,
                                                   S=None,
                                                   E=None)

    # finding exteremes and intial slope

    U_max = -dot(G_thetaMax, X)
    u_max = U_max[ForAgent, 0]
    U_min = -dot(G_thetaMin, X)
    u_min = U_min[ForAgent, 0]

    mulfac = 1  # multiplication factor, takes values 1 or -1 dep on some rules
    theta_est = R_est[ForAgent, ForAgent]

    slope = (u_max - u_min) / (theta_max - theta_min)
    endpoint1 = np.array([[theta_est], [u_est]])

    if (slope > 0) and (u_obs < u_est):
        mulfac = -1

    if (slope < 0) and (u_obs > u_est):
        mulfac = -1

    if (mulfac == 1):
        endpoint2 = np.array([[theta_max], [u_max]])

    if (mulfac == -1):
        endpoint2 = np.array([[theta_min], [u_min]])

    for i in range(0, N_iter):
        Invslope = (endpoint2[0, 0] - endpoint1[0, 0]) / (endpoint2[1, 0] -
                                                          endpoint1[1, 0])
        theta_est_new = endpoint1[0, 0] + Invslope * (u_obs - endpoint1[1, 0])
        Est_R[ForAgent, ForAgent] = theta_est_new
        [Sinf_thetaEst, L_thetaEst, G_thetaEst] = dare(A,
                                                       B,
                                                       Q,
                                                       Est_R,
                                                       S=None,
                                                       E=None)
        U_est_new = -dot(G_thetaEst, X)
        u_est_new = U_est_new[ForAgent, 0]

        if (((u_est_new) - (u_obs)) * ((u_obs) - (endpoint2[1, 0]))) > 0:
            endpoint1 = np.array([[theta_est_new], [u_est_new]])
        else:
            endpoint2 = np.array([[theta_est_new], [u_est_new]])

        theta_est = theta_est_new  # not being used anywhere

    return Est_R
Ejemplo n.º 7
0
y = x.T
for i in range(N):
    x = P @ x
    y = np.r_[y, x.T]
#plt.plot(y)
#print(y)

Pol = np.array([0.5, 0.1])
k = matlab.acker(P, q, Pol)
#print('PP k=', k)
Ev = np.linalg.eigvals(P - q @ k)
#print('Ev=', Ev)

x = x0
y = x.T
for i in range(N):
    x = (P - q @ k) @ x
    y = np.r_[y, x.T]
#plt.plot(y)

Wx = np.identity(2)
Wu = 1
H, Pol, k = matlab.dare(P, q, Wx, Wu)
print('LQ k=', k)

x = x0
y = x.T
for i in range(N):
    x = (P - q @ k) @ x
    y = np.r_[y, x.T]
plt.plot(y)