Ejemplo n.º 1
0
def gdlyap(problem_data, K, L, show_warn=False, check_pd=False):
    """
    Solve a discrete-time generalized Lyapunov equation
    for stochastic linear systems with multiplicative noise.
    """

    problem_data_keys = [
        'A', 'B', 'C', 'Ai', 'Bj', 'Ck', 'varAi', 'varBj', 'varCk', 'Q', 'R',
        'S'
    ]
    A, B, C, Ai, Bj, Ck, varAi, varBj, varCk, Q, R, S = [
        problem_data[key] for key in problem_data_keys
    ]
    n = A.shape[1]

    stable = True
    # Compute matrix and vector for the linear equation solver
    Alin_P = np.eye(n * n) - cost_operator_P(problem_data, K, L)
    blin_P = vec(Q) + np.dot(kron(K.T), vec(R)) - np.dot(kron(L.T), vec(S))

    # Solve linear equations
    xlin_P = la.solve(Alin_P, blin_P)

    # Reshape
    P = np.reshape(xlin_P, [n, n])

    if check_pd:
        stable = is_pos_def(P)

    if not stable:
        P = None
        if show_warn:
            warnings.simplefilter('always', UserWarning)
            warn('System is possibly not mean-square stable')
    return P
    def lse(self, L):
        """
        Performs Least Squares Estimation with input data provided and returns 
        the least squares estimate        
        """
        # Unpack the required dynamics data
        Q = self.Q
        R = self.R
        n = self.n
        m = self.m
        ell = self.ell
        kronA = self.kronA
        kronB = self.kronB

        # Perform least-squares estimation
        Q_est = np.copy(Q)
        R_est = np.copy(R)
        if self.unknown_params.Q and self.unknown_params.R:
            S = np.hstack([kronA, kronB])
            vecTheta = mdot(la.pinv(S), vec(L))
            vecQ_est = vecTheta[0:ell**2]
            vecR_est = vecTheta[ell**2:]
            Q_est = np.reshape(vecQ_est, [n, n])
            R_est = np.reshape(vecR_est, [m, m])
        elif not self.unknown_params.Q and self.unknown_params.R:
            S = np.copy(kronB)
            vecCW = mdot(kronA, vec(Q))
            vecR_est = mdot(la.pinv(S), vec(L) - vecCW)
            R_est = np.reshape(vecR_est, [m, m])
        elif self.unknown_params.Q and not self.unknown_params.R:
            S = np.copy(kronA)
            vecCV = mdot(kronB, vec(R))
            vecQ_est = mdot(la.pinv(S), vec(L) - vecCV)
            Q_est = np.reshape(vecQ_est, [n, n])
        return Q_est, R_est
def reshaper(X,m,n,p,q):
    Y = np.zeros([m*n,p*q])
    k = 0
    for j in range(n):
        for i in range(m):
            Y[k] = vec(X[i*p:(i+1)*p,j*q:(j+1)*q])
            k += 1
    return Y
def calc_sparsity(K, thresh, PGO):
    if PGO is None:
        regstr = 'vec1'
    else:
        if PGO.regularizer is None:
            regstr = 'vec1'
        else:
            regstr = PGO.regularizer.regstr

    # Calculate vals
    if regstr == 'vec1' or PGO.regularizer.regstr == 'vec_huber':
        vals = np.abs(vec(K))
    elif regstr == 'mr':
        vals = np.abs(K).max(1)
    elif regstr == 'mc':
        vals = np.abs(K).max(0)
    elif regstr == 'glr' or regstr == 'glr_huber':
        vals = la.norm(K, ord=2, axis=1)
    elif regstr == 'glc' or regstr == 'glc_huber':
        vals = la.norm(K, ord=2, axis=0)

    binmax = np.max(vals)
    bin1 = thresh * binmax
    sparsity = np.sum(vals < bin1) / vals.size
    print('Sparsity = %.3f' % sparsity)

    # Calculate black and white sparsity matrix
    Kbw = np.zeros_like(K)
    if regstr == 'vec1' or regstr == 'vec_huber':
        Kbw = np.abs(K) > bin1
    elif regstr == 'mr' or regstr == 'glr' or regstr == 'glr_huber':
        for i in range(K.shape[0]):
            if vals[i] > bin1:
                Kbw[i, :] = 1
    elif regstr == 'mc' or regstr == 'glc' or regstr == 'glc_huber':
        for j in range(K.shape[1]):
            if vals[j] > bin1:
                Kbw[:, j] = 1

    return vals, sparsity, binmax, bin1, Kbw
def example_system_erdos_renyi(n,
                               diffusion_constant=1.0,
                               leakiness_constant=0.1,
                               time_constant=0.05,
                               leaky=True,
                               seed=None,
                               detailed_outputs=False,
                               dirname_out='.'):
    npr.seed(seed)
    # ER probability
    # crp = 7.0
    # erp = (np.log(n+1)+crp)/(n+1)  # almost surely connected prob=0.999

    mean_degree = 4.0  # should be > 1 for giant component to exist
    erp = mean_degree / (n - 1.0)

    n_edges = 0
    # Create random Erdos-Renyi graph
    # Adjacency matrix
    adjacency = np.zeros([n, n])
    for i in range(n):
        for j in range(i + 1, n):
            if npr.rand() < erp:
                n_edges += 1
                adjacency[i, j] = npr.randint(low=1, high=4)
                adjacency[j, i] = np.copy(adjacency[i, j])

    # Degree matrix
    degree = np.diag(adjacency.sum(axis=0))
    # Graph Laplacian
    laplacian = degree - adjacency
    # Continuous-time dynamics matrices
    Ac = -laplacian * diffusion_constant
    Bc = np.eye(
        n
    ) / time_constant  # normalize just to make B = np.eye(n) later in discrete-time

    if leaky:
        Fc = leakiness_constant * np.eye(n)
        Ac = Ac - Fc

    # Plot
    visualize_graph_ring(adjacency, n, dirname_out)

    # Forward Euler discretization
    A = np.eye(n) + Ac * time_constant
    B = Bc * time_constant
    n = np.copy(n)
    m = np.copy(n)

    # Multiplicative noises
    varAi = 0.005 * npr.randint(low=1, high=5, size=n_edges) * np.ones(n_edges)
    Ai = np.zeros([n_edges, n, n])
    k = 0
    for i in range(n):
        for j in range(i + 1, n):
            if adjacency[i, j] > 0:
                Ai[k, i, i] = 1
                Ai[k, j, j] = 1
                Ai[k, i, j] = -1
                Ai[k, j, i] = -1
                k += 1

    varBj = 0.05 * npr.randint(low=1, high=5, size=n) * np.ones(n)
    Bj = np.zeros([n, n, m])
    for i in range(n):
        Bj[i, i, i] = 1

    SigmaA = np.sum(
        [varAi[i] * np.outer(vec(Ai[i]), vec(Ai[i])) for i in range(n_edges)],
        axis=0)
    SigmaB = np.sum(
        [varBj[j] * np.outer(vec(Bj[j]), vec(Bj[j])) for j in range(n)],
        axis=0)

    if detailed_outputs:
        outputs = n, m, A, B, SigmaA, SigmaB, varAi, varBj, Ai, Bj
    else:
        outputs = n, m, A, B, SigmaA, SigmaB

    return outputs
Ejemplo n.º 6
0
def run_policy_gradient(SS, PGO, CSO=None):
    # run_policy_gradient  Run policy gradient descent on a system
    #
    # Inputs:
    # SS is an LQRSysMult instance with an initial gain matrix K
    # PGO is a PolicyGradientOptions instance
    #
    # K1_subs is the subscripts of the first gain entry varied in surf plot
    # K2_subs is the subscripts of the first gain entry varied in surf plot
    # ax is the axes to plot in
    #
    # Outputs:
    # SS with closed-loop properties at the post-optimization configuration
    # histlist, a list of data histories over optimization iterations

    # TEMPORARY - MOVE INTO CLASS OPTIONS IF KEEPING AROUND
    Kmax = np.max(np.abs(vec(SS.K)))
    bin1 = 0.01 * Kmax

    # Initialize
    stop = False
    converged = False
    stop_early = False
    iterc = 0
    sleep(0.5)
    t_start = time()

    headerstr = 'Iteration | Stop quant / threshold |  Curr obj |  Best obj | Norm of gain delta | Stepsize  '
    if PGO.regularizer is not None:
        headerstr = headerstr + '| Sparsity'
    print(headerstr)

    K = np.copy(SS.K)
    Kbest = np.copy(SS.K)
    objfun_best = np.inf
    Kold = np.copy(SS.K)

    P = SS.P
    S = SS.S

    fbest_repeats = 0

    # Initialize history matrices
    if PGO.keep_hist:
        mat_shape = list(K.shape)
        mat_shape.append(PGO.max_iters)
        mat_shape = tuple(mat_shape)
        K_hist = np.full(mat_shape, np.inf)
        grad_hist = np.full(mat_shape, np.inf)
        c_hist = np.full(PGO.max_iters, np.inf)
        objfun_hist = np.full(PGO.max_iters, np.inf)

    rng = rngg()

    # Iterate
    while not stop:
        if PGO.exact:
            # Calculate gradient (G)
            # Do this to get combined calculation of P and S,
            # pass previous P and S to warm-start dlyap iterative algorithm
            SS.calc_PS(P, S)
            Glqr = SS.grad
            P = SS.P
            S = SS.S
        else:
            if any([
                    PGO.step_direction == 'gradient',
                    PGO.step_direction == 'natural_gradient',
                    PGO.step_direction == 'gauss_newton',
                    PGO.step_direction == 'policy_iteration'
            ]):
                # Estimate gradient using zeroth-order optimization

                # Rollout length
                nt = 20

                # Number of rollouts
                nr = 100000

                # Exploration radius
                ru = 1e-1

                # Draw random initial states
                x = rng.multivariate_normal(np.zeros(SS.n), SS.S0, nr)

                # Draw random gain deviations and scale to Frobenius norm ball
                Uraw = rng.normal(size=[nr, SS.m, SS.n])
                U = ru * Uraw / la.norm(Uraw, 'fro', axis=(1, 2))[:, None,
                                                                  None]

                # Stack dynamics matrices into a 3D array
                Kd = K + U

                # Simulate all rollouts together
                c = np.zeros(nr)
                for t in range(nt):
                    # Accumulate cost
                    c += np.einsum('...i,...i', x,
                                   np.einsum('jk,...k', SS.QK, x))

                    # Calculate noisy closed-loop dynamics
                    AKr = SS.A + np.einsum('...ik,...kj', SS.B, Kd)
                    for i in range(SS.p):
                        AKr += (SS.a[i]**0.5) * rng.randn(
                            nr)[:, np.newaxis, np.newaxis] * np.repeat(
                                SS.Aa[np.newaxis, :, :, i], nr, axis=0)
                    for j in range(SS.q):
                        AKr += np.einsum(
                            '...ik,...kj', (SS.b[j]**0.5) *
                            rng.randn(nr)[:, np.newaxis, np.newaxis] *
                            np.repeat(SS.Bb[np.newaxis, :, :, j], nr, axis=0),
                            Kd)

                    # Transition the state
                    x = np.einsum('...jk,...k', AKr, x)

                # Estimate gradient
                Glqr = np.einsum('i,i...', c, U)
                Glqr *= K.size / (nr * (ru**2))

                # TESTING
                G_est = Glqr
                G_act = SS.grad

                print('estimated gradient: ')
                print(G_est)
                print('actual gradient: ')
                print(G_act)
                print('error angle')
                print(
                    np.arccos(
                        np.sum((G_est * G_act)) /
                        (la.norm(G_est) * la.norm(G_act))))
                print('error scale')
                print((la.norm(G_est) / la.norm(G_act)))

#INWORK
#                if step_direction=='natural_gradient' or step_direction=='gauss_newton' or step_direction=='policy_iteration':
#                    # Estimate S
#                    if step_direction=='policy_iteration':
#                        # Estimate R_K
#
#                        # NOTE!!!!! only valid for zero multiplicative noise
#                        # for now - also need to estimate the noise terms? or
#                        # are they known a priori?
#
#                        # Also need to finish the True "coarse-ID" estimation
#                        # which takes uncertainty into account to ensure the
#                        # estimated optimal gain is stabilizing so P_K is well
#                        # defined
#
#                        # Also for the theory we need estimates of error of R_K
#
#
#                        # Model-based estimation
#                        [Ahat,Bhat] = lsqr_lti(SS)
#                        P_Khat = dlyap_mult(Ahat,Bhat,SS.a,SS.Aa,SS.b,SS.Bb,SS.Q,SS.R,SS.S0,K)
#                        R_K = SS.R + Bhat.T*P_Khat*Bhat
#
#                        # Model-free estimation
#                        # Rollout length
#                        nt = 20
#
#                        # Number of rollouts
#                        nr = 10000
#
#                        # Random initial state standard deviation
#                        xstd = 1
#
#                        # Random control input standard deviation
#                        ustd = 1
#
#                        # Random disturbance input standard deviation
#                        # wstd = 0.01
#                        wstd = 0
#
#                        [~,H21hat,H22hat] = lsqr_lti_qfun(SS,xstd,ustd,wstd,nt,nr)
#                        H22=H22hat
#                        H21=H21hat

# Calculate step direction (V)
        if PGO.regularizer is None or PGO.opt_method == 'proximal':
            G = Glqr
        else:
            Greg = PGO.regularizer.rgrad(SS.K)
            G = Glqr + PGO.regweight * Greg

        if PGO.regularizer is None or PGO.opt_method == 'proximal':
            if PGO.step_direction == 'gradient':
                V = G
            elif PGO.step_direction == 'natural_gradient':
                V = solveb(SS.grad, SS.S)
            elif PGO.step_direction == 'gauss_newton':
                V = solveb(la.solve(SS.RK, SS.grad), SS.S)
        else:
            if PGO.step_direction == 'gradient':
                V = G
#            # Variant 1 - seems more elegant but why should it work?
#            elif PGO.step_direction=='natural_gradient':
#                V = solveb(G,SS.S)
#            elif PGO.step_direction=='gauss_newton':
#                V = solveb(la.solve(SS.RK,G),SS.S)
#            # Variant 2 - seems more justifiable
#            elif PGO.step_direction=='natural_gradient':
#                V = solveb(SS.grad,SS.S) + PGO.regweight*PGO.regularizer.rgrad(SS.K)
#            elif PGO.step_direction=='gauss_newton':
#                V = solveb(la.solve(SS.RK,SS.grad),SS.S) + PGO.regweight*PGO.regularizer.rgrad(SS.K)

# Check if mean-square stable
        if SS.c == np.inf:
            raise Exception('ITERATE WENT UNSTABLE DURING GRADIENT DESCENT')

        if PGO.regularizer is None:
            objfun = SS.c
        else:
            objfun = SS.c + PGO.regweight * PGO.regularizer.rfun(SS.K)

        # Record current iterate
        if PGO.keep_hist:
            K_hist[:, :, iterc] = SS.K
            grad_hist[:, :, iterc] = SS.grad
            c_hist[iterc] = SS.c
            objfun_hist[iterc] = objfun

        if iterc == 0:
            Kchange = np.inf
        else:
            Kchange = la.norm(K - Kold, 'fro') / la.norm(K, 'fro')
        Kold = K

        # Check for stopping condition
        if PGO.stop_crit == 'gradient':
            normgrad = la.norm(G)
            stop_quant = normgrad
            stop_thresh = PGO.epsilon
            if normgrad < PGO.epsilon:
                converged = True
        elif PGO.stop_crit == 'Kchange':
            stop_quant = Kchange
            stop_thresh = PGO.epsilon
            if Kchange < PGO.epsilon:
                converged = True
        elif PGO.stop_crit == 'fbest':
            stop_quant = fbest_repeats
            stop_thresh = PGO.fbest_repeat_max
            if fbest_repeats > PGO.fbest_repeat_max:
                converged = True
        elif PGO.stop_crit == 'fixed':
            stop_quant = iterc
            stop_thresh = PGO.max_iters

        if iterc >= PGO.max_iters - 1:
            stop_early = True
        else:
            iterc += 1

        stop = converged or stop_early

        if PGO.display_output and PGO.regularizer is not None:
            Kmax = np.max(np.abs(vec(SS.K)))
            bin1 = 0.05 * Kmax
            sparsity = np.sum(np.abs(SS.K) < bin1) / SS.K.size

        # Record current best (subgradient method)
        if objfun < objfun_best:
            objfun_best = objfun
            Kbest = SS.K
            fbest_repeats = 0
        else:
            fbest_repeats += 1

        # Update iterate
        if PGO.opt_method == 'gradient':
            if PGO.step_direction == 'policy_iteration':
                eta = 0.5  # for printing only
                H21 = la.multi_dot([SS.B.T, SS.P, SS.A])
                H22 = SS.RK
                if PGO.regularizer is None:
                    K = -la.solve(H22, H21)
                    SS.setK(K)
                else:
                    if PGO.stepsize_method == 'constant':
                        K = -la.solve(
                            H22, H21
                        ) - PGO.eta * PGO.regweight * Greg  # This might not work, it is sequential GN then grad desc regularizer
                        SS.setK(K)
                    elif PGO.stepsize_method == 'backtrack':
                        raise Exception(
                            "Invalid stepsize option, choose constant")
            else:
                # Calculate step size
                if PGO.stepsize_method == 'constant':
                    eta = PGO.eta
                    K = SS.K - eta * V
                    SS.setK(K)
                elif PGO.stepsize_method == 'backtrack':
                    SS, eta = backtrack(SS,
                                        PGO.regweight,
                                        PGO.regularizer,
                                        G,
                                        -V,
                                        eta0=PGO.eta)
                    K = SS.K
                elif PGO.stepsize_method == 'square_summable':  # INWORK
                    eta = PGO.eta / (1.0 + iterc)
                    K = SS.K - eta * V
                    SS.setK(K)
        elif PGO.opt_method == 'proximal':
            # Gradient step on LQR cost
            if PGO.step_direction == 'policy_iteration':
                eta = 0.5  # for printing only
                H21 = la.multi_dot([SS.B.T, SS.P, SS.A])
                H22 = SS.RK
                K = -la.solve(H22, H21)
                SS.setK(K)
            else:
                # Calculate step size
                if PGO.stepsize_method == 'constant':
                    eta = PGO.eta
                    K = SS.K - eta * V
                    SS.setK(K)
                elif PGO.stepsize_method == 'backtrack':
                    SS, eta = backtrack(SS,
                                        PGO.regweight,
                                        PGO.regularizer,
                                        G,
                                        -V,
                                        eta0=PGO.eta)
                    K = SS.K
                elif PGO.stepsize_method == 'square_summable':  # INWORK
                    eta = PGO.eta / (1.0 + iterc)
                    K = SS.K - eta * V
                    SS.setK(K)
            # Prox step on regularizer
            K = prox(SS, PGO)
            SS.setK(K)

        if hasattr(PGO, 'slow'):
            if PGO.slow is not None:
                sleep(PGO.slow)

        # Printing
        if PGO.display_output:
            # Print iterate messages
            printstr0 = "{0:9d}".format(iterc + 1)
            printstr1 = " {0:5.3e} / {1:5.3e}".format(stop_quant, stop_thresh)
            printstr2a = "{0:5.3e}".format(objfun)
            printstr2b = "{0:5.3e}".format(objfun_best)
            printstr3 = "         {0:5.3e}".format(Kchange)
            printstr4 = "{0:5.3e}".format(eta)
            printstr = printstr0 + ' | ' + printstr1 + ' | ' + printstr2a + ' | ' + printstr2b + ' | ' + printstr3 + ' | ' + printstr4
            if PGO.regularizer is not None:
                printstr5 = "{0:6.2f}%".format(100 * sparsity)
                printstr = printstr + ' | ' + printstr5
            if PGO.display_inplace:
                if iterc == 0:
                    print(" " * len(printstr), end='')
                inplace_print(printstr)
            else:
                print(printstr)
            if stop:  # Print stopping messages
                print('')
                if converged:
                    print('Optimization converged, stopping now')
                if stop_early:
                    #                    warnings.simplefilter('always', UserWarning)
                    #                    warn('Max iterations exceeded, stopping optimization early')
                    print('Max iterations exceeded, stopping optimization')

    if PGO.keep_hist:
        # Trim empty parts from preallocation
        K_hist = K_hist[:, :, 0:iterc + 1]
        grad_hist = grad_hist[:, :, 0:iterc + 1]
        c_hist = c_hist[0:iterc + 1]
        objfun_hist = objfun_hist[0:iterc + 1]
    else:
        K_hist = None
        grad_hist = None
        c_hist = None
        objfun_hist = None

    if PGO.keep_opt == 'best':
        SS.setK(Kbest)

    t_end = time()

    hist_list = [K_hist, grad_hist, c_hist, objfun_hist]

    print(
        'Policy gradient descent optimization completed after %d iterations, %.3f seconds'
        % (iterc, t_end - t_start))
    return SS, hist_list
Ejemplo n.º 7
0
def regularizer_grad(K, regstr, mu=0, soft=False, thresh1=0, thresh2=0):
    # VECTOR NORMS
    if regstr == 'vec1':  # Vector 1-norm
        grad = np.sign(K)
    if regstr == 'vec2':  # Vector 1-norm
        grad = K / la.norm(K, 'fro')
    if regstr == 'vecinf':  # Vector inf-norm
        if not soft:
            grad = infnormgrad(K)
        else:
            grad = infnormgrad(K, thresh1)

    # HUBER VECTOR NORMS
    if regstr == 'vec_huber':  # Vector Huber-norm
        grad = softsign(K, thresh2)

    # SQUARED VECTOR NORMS
    if regstr == 'vec1sq':  # Squared vector 1-norm
        grad = 2 * la.norm(vec(K), ord=1) * np.sign(K)
    if regstr == 'vec2sq':  # Squared vector 2-norm
        grad = 2 * K
    if regstr == 'vecinfsq':  # Squared vector inf-norm
        if not soft:
            grad = infnormgradsq(K)
        else:
            grad = infnormgradsq(K, thresh1)
    if regstr == 'vec_hubersq':  # Squared vector Huber-norm
        grad = 2 * huber_norm(thresh2, K) * softsign(K, thresh2)

    # MATRIX NORMS
    if regstr == 'mr':  # Row norm
        grad = np.zeros_like(K)
        for i in range(K.shape[0]):
            if not soft:
                grad[i, :] = infnormgrad(K[i, :])
            else:
                grad[i, :] = infnormgrad(K[i, :], thresh1)
    if regstr == 'mc':  # Column norm
        grad = np.zeros_like(K)
        for j in range(K.shape[1]):
            if not soft:
                grad[:, j] = infnormgrad(K[:, j])
            else:
                grad[:, j] = infnormgrad(K[:, j], thresh1)
    if regstr == 'glr':  # Group lasso on rows
        grad = np.zeros_like(K)
        for i in range(K.shape[0]):
            grad[i, :] = K[i, :] / la.norm(K[i, :], 2)
    if regstr == 'glc':  # Group lasso on columns
        grad = np.zeros_like(K)
        for j in range(K.shape[1]):
            grad[:, j] = K[:, j] / la.norm(K[:, j], 2)
    if regstr == 'sglr':  # Sparse group lasso on rows
        grad = (1 - mu) * regularizer_grad(K, 'vec1') + mu * regularizer_grad(
            K, 'glr')
    if regstr == 'sglc':  # Sparse group lasso on columns
        grad = (1 - mu) * regularizer_grad(K, 'vec1') + mu * regularizer_grad(
            K, 'glc')

    # HUBER MATRIX NORMS
    if regstr == 'mr_huber':
        grad = np.zeros_like(K)
        for i in range(K.shape[0]):
            a = la.norm(K[i, :], np.inf)
            if a > thresh2:
                grad[i, :] = infnormgrad(K[i, :])
            else:
                grad[i, :] = (0.5 / thresh2) * infnormgradsq(K[i, :])

    if regstr == 'glr_huber':
        grad = np.zeros_like(K)
        for i in range(K.shape[0]):
            a = la.norm(K[i, :], 2)
            if a > thresh2:
                grad[i, :] = K[i, :] / a
            else:
                grad[i, :] = (1 / thresh2) * K[i, :]
    if regstr == 'glc_huber':
        grad = np.zeros_like(K)
        for j in range(K.shape[1]):
            a = la.norm(K[:, j], 2)
            if a > thresh2:
                grad[:, j] = K[:, j] / a
            else:
                grad[:, j] = K[:, j] / thresh2

    # SQUARED MATRIX NORMS
    if regstr == 'mrsq':  # Squared row norm
        grad = np.zeros_like(K)
        subs = []
        subsmax = []
        for i, im in enumerate(np.argmax(K, axis=1)):
            subsmax.append((i, im))
        if not soft:
            subs = subsmax.copy()
        else:
            for i, row in enumerate(K):
                for j, colj in enumerate(row):
                    if colj >= (1 - thresh1) * K[subsmax[i]]:
                        subs.append((i, j))
        for sb in subs:
            if not soft:
                grad[sb] = 2 * regularizer_fun(K, 'mr') * softsignscalar(K[sb])
            else:
                grad[sb] = 2 * regularizer_fun(K, 'mr') * softsignscalar(
                    K[sb], thresh2)

    return grad
# Reprocess system parameters to match format used in sys-id code
n = np.copy(SS.n)
m = np.copy(SS.m)
A = np.copy(SS.A)
B = np.copy(SS.B)
varAi = np.copy(SS.a)
varBj = np.copy(SS.b)
Aa = SS.Aa
Bb = SS.Bb



Ai = np.moveaxis(SS.Aa, 2, 0)
Bj = np.moveaxis(SS.Bb, 2, 0)
SigmaA = np.sum([varAi[i]*np.outer(vec(Ai[i]), vec(Ai[i])) for i in range(SS.p)], axis=0)
SigmaB = np.sum([varBj[j]*np.outer(vec(Bj[j]), vec(Bj[j])) for j in range(SS.q)], axis=0)
Q = np.copy(SS.Q)
R = np.copy(SS.R)

# Rollout length (same as pol-grad)
ell = 20

# Number of rollouts
# Max number of rollouts = nr*num_iters from pol-grad experiment
nr = 1000
num_iters = 200
ns = 10

# Number of experiments/trials
ne = 20
    def run(self, T):
        """
        Perform adaptive Kalman filter iterations
        Input Parameters:
        T: Integer of maximum filter iterations
        """
        F = self.F
        H = self.H
        Q = self.Q
        R = self.R
        n = self.n
        m = self.m
        p = self.p
        Mopi = self.Mopi
        x_mean0 = self.x_mean0
        x_covr0 = self.x_covr0
        Q_est0 = self.Q_est0
        R_est0 = self.R_est0
        L0 = self.L0

        # Preallocate history data arrays
        x_hist = np.full((T + 1, n), np.nan)
        y_hist = np.full((T + 1, m), np.nan)
        x_pre_hist = np.full((T + 1, n), np.nan)
        x_post_hist = np.full((T + 1, n), np.nan)
        P_pre_hist = np.full((T + 1, n, n), np.nan)
        P_post_hist = np.full((T + 1, n, n), np.nan)
        K_hist = np.full((T + 1, n, m), np.nan)
        Q_est_hist = np.full((T + 1, n, n), np.nan)
        R_est_hist = np.full((T + 1, m, m), np.nan)

        # Initialize the iterates
        x_post = x_mean0
        P_post = x_covr0
        Q_est = Q_est0
        R_est = R_est0
        L = L0
        x = npr.multivariate_normal(x_mean0, x_covr0)
        x_hist[0] = x
        x_post_hist[0] = x_post
        P_post_hist[0] = P_post

        # Perform dynamic adaptive Kalman filter updates
        for k in range(T):
            # Print the iteration number
            print("k = %9d / %d" % (k + 1, T))
            # Generate a new multivariate Gaussian measurement noise
            v = npr.multivariate_normal(np.zeros(m), R)
            # Collect and store a new measurement
            y = mdot(H, x) + v
            y_hist[k] = y
            # Noise covariance estimation
            if k > p - 1:
                # Collect measurement till 'k-1' time steps
                Yold = vec(y_hist[np.arange(k - 1, k - p - 1, -1)].T)

                # Collect measurement till 'k' time steps
                Ynew = vec(y_hist[np.arange(k, k - p, -1)].T)

                # Formulate a linear stationary time series
                Z = mdot(Mopi, Ynew) - mdot(F, Mopi, Yold)

                # Recursive covariance unbiased estimator
                L = ((k - 1) / k) * L + (1 / k) * np.outer(Z, Z)

                # Get the least squares estimate of selected covariances
                Q_est_new, R_est_new = self.lse(L)

                # Check positive semidefiniteness of estimated covariances
                if is_pos_def(Q_est_new):
                    Q_est = np.copy(Q_est_new)
                if is_pos_def(R_est_new):
                    R_est = np.copy(R_est_new)

            # Update the covariance estimate history
            Q_est_hist[k] = Q_est
            R_est_hist[k] = R_est

            ## Update state estimates using standard Kalman filter equations
            # Calculate the a priori state estimate
            x_pre = mdot(F, x_post)

            # Calculate the a priori error covariance estimate
            P_pre = mdot(F, P_post, F.T) + Q_est

            # Calculate the Kalman gain
            K = solveb(mdot(P_pre, H.T), mdot(H, P_pre, H.T) + R_est)

            # Calculate the a posteriori state estimate
            x_post = x_pre + mdot(K, y - mdot(H, x_pre))

            # Calculate the a posteriori error covariance estimate
            IKH = np.eye(n) - mdot(K, H)
            P_post = mdot(IKH, P_pre, IKH.T) + mdot(K, R, K.T)

            # Store the histories
            x_pre_hist[k + 1] = x_pre
            x_post_hist[k + 1] = x_post
            P_pre_hist[k + 1] = P_pre
            P_post_hist[k + 1] = P_post
            K_hist[k + 1] = K

            # True system updates (true state transition and measurement)
            # Generate process noise
            w = npr.multivariate_normal(np.zeros(n), Q)

            # Update and store the state
            x = mdot(F, x) + w
            x_hist[k + 1] = x

        # Tie up loose ends
        y_hist[-1] = y
        x_pre_hist[0] = x_post_hist[0]
        P_pre_hist[0] = P_post_hist[0]
        K_hist[0] = K_hist[1]
        Q_est_hist[-1] = Q_est
        R_est_hist[-1] = R_est

        # Return data history
        data_hist = DataHist(T, Q_est_hist, R_est_hist, x_hist, x_pre_hist,
                             x_post_hist, P_pre_hist, P_post_hist)
        return data_hist
def estimate_model(n, m, A, B, SigmaA, SigmaB, nr, ell, x_hist, u_mean_hist, u_covr_hist,
                   display_estimates=False, AB_known=False):
    muhat_hist = np.zeros([ell+1, n])
    Xhat_hist = np.zeros([ell+1, n*n])
    What_hist = np.zeros([ell+1, n*m])

    # First stage: mean dynamics parameter estimation
    if AB_known:
        Ahat = np.copy(A)
        Bhat = np.copy(B)
    else:
        # Form data matrices for least-squares estimation
        for t in range(ell+1):
            muhat_hist[t] = (1/nr)*np.sum(x_hist[t], axis=0)
            Xhat_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j', x_hist[t], x_hist[t]), axis=0))
            if t < ell:
                # What_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j',x_hist[t],u_mean_hist[t]),axis=0))
                What_hist[t] = vec(np.outer(muhat_hist[t], u_mean_hist[t]))
        Y = muhat_hist[1:].T
        Z = np.vstack([muhat_hist[0:-1].T, u_mean_hist.T])
        # Solve least-squares problem
        # Thetahat = mdot(Y, Z.T, la.pinv(mdot(Z, Z.T)))
        Thetahat = la.lstsq(Z.T, Y.T, rcond=None)[0].T
        # Split learned model parameters
        Ahat = Thetahat[:,0:n]
        Bhat = Thetahat[:,n:n+m]

    AAhat = np.kron(Ahat, Ahat)
    ABhat = np.kron(Ahat, Bhat)
    BAhat = np.kron(Bhat, Ahat)
    BBhat = np.kron(Bhat, Bhat)

    # Second stage: covariance dynamics parameter estimation
    # Form data matrices for least-squares estimation
    C = np.zeros([ell, n*n]).T
    Uhat_hist = np.zeros([ell, m*m])
    for t in range(ell):
        Uhat_hist[t] = vec(u_covr_hist[t] + np.outer(u_mean_hist[t], u_mean_hist[t]))
        Cminus = mdot(AAhat,Xhat_hist[t])+mdot(BAhat,What_hist[t])+mdot(ABhat,What_hist[t].T)+mdot(BBhat,Uhat_hist[t])
        C[:,t] = Xhat_hist[t+1] - Cminus
    D = np.vstack([Xhat_hist[0:-1].T, Uhat_hist.T])
    # Solve least-squares problem
    # SigmaThetahat_prime = mdot(C, D.T, la.pinv(mdot(D,D.T)))
    SigmaThetahat_prime = la.lstsq(D.T, C.T, rcond=None)[0].T
    # Split learned model parameters
    SigmaAhat_prime = SigmaThetahat_prime[:, 0:n*n]
    SigmaBhat_prime = SigmaThetahat_prime[:, n*n:n*(n+m)]

    # Reshape and project the noise covariance estimates onto the semidefinite cone
    SigmaAhat = reshaper(SigmaAhat_prime, n, n, n, n)
    SigmaBhat = reshaper(SigmaBhat_prime, n, m, n, m)
    SigmaAhat = positive_semidefinite_part(SigmaAhat)
    SigmaBhat = positive_semidefinite_part(SigmaBhat)

    if display_estimates:
        prettyprint(Ahat, "Ahat")
        prettyprint(A, "A   ")
        prettyprint(Bhat, "Bhat")
        prettyprint(B, "B   ")
        prettyprint(SigmaAhat, "SigmaAhat")
        prettyprint(SigmaA, "SigmaA   ")
        prettyprint(SigmaBhat, "SigmaBhat")
        prettyprint(SigmaB, "SigmaB   ")

    return Ahat, Bhat, SigmaAhat, SigmaBhat
def estimate_model_var_only(n, m, A, B, SigmaA, SigmaB, varAi, varBj, Ai, Bj, nr, ell, x_hist, u_mean_hist, u_covr_hist,
                            display_estimates=False, AB_known=False, detailed_outputs=True):
    muhat_hist = np.zeros([ell+1, n])
    Xhat_hist = np.zeros([ell+1, n*n])
    What_hist = np.zeros([ell+1, n*m])

    # First stage: mean dynamics parameter estimation
    if AB_known:
        Ahat = np.copy(A)
        Bhat = np.copy(B)
    else:
        # Form data matrices for least-squares estimation
        for t in range(ell+1):
            muhat_hist[t] = (1/nr)*np.sum(x_hist[t], axis=0)
            Xhat_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j', x_hist[t], x_hist[t]), axis=0))
            if t < ell:
                # What_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j',x_hist[t],u_mean_hist[t]),axis=0))
                What_hist[t] = vec(np.outer(muhat_hist[t], u_mean_hist[t]))
        Y = muhat_hist[1:].T
        Z = np.vstack([muhat_hist[0:-1].T, u_mean_hist.T])
        # Solve least-squares problem
        # Thetahat = mdot(Y, Z.T, la.pinv(mdot(Z, Z.T)))
        Thetahat = la.lstsq(Z.T, Y.T, rcond=None)[0].T
        # Split learned model parameters
        Ahat = Thetahat[:,0:n]
        Bhat = Thetahat[:,n:n+m]

    AAhat = np.kron(Ahat, Ahat)
    ABhat = np.kron(Ahat, Bhat)
    BAhat = np.kron(Bhat, Ahat)
    BBhat = np.kron(Bhat, Bhat)

    # Second stage: covariance dynamics parameter estimation
    # Form data matrices for least-squares estimation
    C = np.zeros([ell, n*n]).T
    Uhat_hist = np.zeros([ell, m*m])
    for t in range(ell):
        Uhat_hist[t] = vec(u_covr_hist[t] + np.outer(u_mean_hist[t], u_mean_hist[t]))
        Cminus = mdot(AAhat,Xhat_hist[t])+mdot(BAhat,What_hist[t])+mdot(ABhat,What_hist[t].T)+mdot(BBhat,Uhat_hist[t])
        C[:,t] = Xhat_hist[t+1] - Cminus

    C = vec(C)

    X1 = Xhat_hist[0:-1].T
    U1 = Uhat_hist.T

    D1 = np.vstack([vec(np.dot(np.kron(Ai[i], Ai[i]), X1)) for i in range(np.size(varAi))])
    D2 = np.vstack([vec(np.dot(np.kron(Bj[j], Bj[j]), U1)) for j in range(np.size(varBj))])

    D = np.vstack([D1, D2])

    # Solve least-squares problem
    # var_hat = mdot(C, D.T, la.pinv(mdot(D,D.T)))
    # var_hat = mdot(la.pinv(mdot(D, D.T)), D, C)
    var_hat = la.lstsq(D.T, C, rcond=None)[0]

    varAi_hat = np.maximum(var_hat[0:np.size(varAi)], 0)
    varBj_hat = np.maximum(var_hat[np.size(varAi):], 0)

    SigmaAhat = np.sum([varAi_hat[i]*np.outer(vec(Ai[i]), vec(Ai[i])) for i in range(np.size(varAi))], axis=0)
    SigmaBhat = np.sum([varBj_hat[j]*np.outer(vec(Bj[j]), vec(Bj[j])) for j in range(np.size(varBj))], axis=0)

    if display_estimates:
        prettyprint(Ahat, "Ahat")
        prettyprint(A, "A   ")
        prettyprint(Bhat, "Bhat")
        prettyprint(B, "B   ")
        prettyprint(SigmaAhat, "SigmaAhat")
        prettyprint(SigmaA, "SigmaA   ")
        prettyprint(SigmaBhat, "SigmaBhat")
        prettyprint(SigmaB, "SigmaB   ")

    if detailed_outputs:
        outputs = Ahat, Bhat, SigmaAhat, SigmaBhat, varAi_hat, varBj_hat
    else:
        outputs = Ahat, Bhat, SigmaAhat, SigmaBhat
    return outputs
Ejemplo n.º 12
0
def dlyap_mult(A,
               B,
               K,
               a,
               Aa,
               b,
               Bb,
               Q,
               R,
               S0,
               matrixtype='P',
               algo='iterative',
               show_warn=False,
               check_pd=False,
               P00=None,
               S00=None):
    n = A.shape[1]
    n2 = n * n
    p = len(a)
    q = len(b)
    AK = A + np.dot(B, K)
    stable = True
    stable2 = True
    if algo == 'linsolve':
        if matrixtype == 'P':
            # Intermediate terms
            Aunc_P = np.zeros([n2, n2])
            for i in range(p):
                Aunc_P = Aunc_P + a[i] * kron(Aa[:, :, i].T)
            BKunc_P = np.zeros([n2, n2])
            for j in range(q):
                BKunc_P = BKunc_P + b[j] * kron(np.dot(K.T, Bb[:, :, j].T))
            # Compute matrix and vector for the linear equation solver
            Alin_P = np.eye(n2) - kron(AK.T) - Aunc_P - BKunc_P
            blin_P = vec(Q) + np.dot(kron(K.T), vec(R))
            # Solve linear equations
            xlin_P = la.solve(Alin_P, blin_P)
            # Reshape
            P = np.reshape(xlin_P, [n, n])
            if check_pd:
                stable = is_pos_def(P)
        elif matrixtype == 'S':
            # Intermediate terms
            Aunc_S = np.zeros([n2, n2])
            for i in range(p):
                Aunc_S = Aunc_S + a[i] * kron(Aa[:, :, i])
            BKunc_S = np.zeros([n2, n2])
            for j in range(q):
                BKunc_S = BKunc_S + b[j] * kron(np.dot(Bb[:, :, j], K))
            # Compute matrix and vector for the linear equation solver
            Alin_S = np.eye(n2) - kron(AK) - Aunc_S - BKunc_S
            blin_S = vec(S0)
            # Solve linear equations
            xlin_S = la.solve(Alin_S, blin_S)
            # Reshape
            S = np.reshape(xlin_S, [n, n])
            if check_pd:
                stable = is_pos_def(S)
        elif matrixtype == 'PS':
            P = dlyap_mult(A,
                           B,
                           K,
                           a,
                           Aa,
                           b,
                           Bb,
                           Q,
                           R,
                           S0,
                           matrixtype='P',
                           algo='linsolve')
            S = dlyap_mult(A,
                           B,
                           K,
                           a,
                           Aa,
                           b,
                           Bb,
                           Q,
                           R,
                           S0,
                           matrixtype='S',
                           algo='linsolve')

    elif algo == 'iterative':
        # Implicit iterative solution to generalized discrete Lyapunov equation
        # Inspired by https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7553367
        # In turn inspired by https://pdf.sciencedirectassets.com/271503/1-s2.0-S0898122100X0020X/1-s2.0-089812219500119J/main.pdf?x-amz-security-token=AgoJb3JpZ2luX2VjECgaCXVzLWVhc3QtMSJIMEYCIQD#2F00Re8b3wnBnFpZQrjkOeXrNI4bYZ1J6#2F9BcJptZYAAIhAOQjTsZX573uFFEr7QveHx4NaZYWxlZfRN6hr5h1GJWWKuMDCOD#2F#2F#2F#2F#2F#2F#2F#2F#2F#2FwEQAhoMMDU5MDAzNTQ2ODY1IgxqkGe6i8wGmEj6YAwqtwNDKbotYDExP2D6PO8MrlIKYmHCtJhTu1CXLv0N5NKsYT90H2rJTNU0MvqsUsnXtbn6C9t9ed31XTf#2BHc7KrGmpOils7zgrjV1QG4LP0Fu2OcT4#2F#2FOGLWNvVjWY9gOLEHSeG5LhvBbxJiZVrI#2Bm1QAIVz5dxH5DVB27A2e9OmRrswrpPWuxQV#2BUvLkz2dVM4qSkvaDA#2F3KEJk9s0XE74mjO4ZHX7d9Q2aYwxsvFbII6Hms#2FZmB6125tBTwzd0K5xDit5kaoiYadOetp3M#2FvCdaiO0QeQwkV4#2FUaprOIIQGwJaMJuMNe7xInQxF#2B#2FmER81JhWEpBHBmz#2F5p0d2tU7F2oTDc2OR#2BV5dTKab47zgUw648fDT7ays0TQzqTMGnGcX9wIQpxSCam2E8Bhg6tsEs0#2FudddgnsiId368q70xai6ucMfabMSCqnv7O0OZqPVwY5b7qk4mxKIehpIzV6rrtXSAGrH95WGlgGz#2Fhmg9Qq6AUtb8NSqyYw0uZ00E#2FPZmNTnI3nwxjOA5qhyEbw3uXogRwYrv0dLkd50s7oO3mlYFeJDBurhx11t9p94dFqQq7sDY70m#2F4xMNCcmuUFOrMBY1JZuqtQ7QFBVbgzV#2B4xSHV6#2FyD#2F4ezttczZY3eSASJpdC4rjYHXcliiE7KOBHivchFZMIYeF3J4Nvn6UykX5sNfRANC2BDPrgoCQUp95IE5kgYGB8iEISlp40ahVXK62GhEASJxMjJTI9cJ2M#2Ff#2BJkwmqAGjTsBwjxkgiLlHc63rBAEJ2e7xoTwDDql3FSSYcvKzwioLfet#2FvXWvjPzz44tB3#2BTvYamM0uq47XPlUFcTrw#3D&AWSAccessKeyId=ASIAQ3PHCVTYWXNG3EKG&Expires=1554423148&Signature=Ysi80usGGEjPCvw#2BENTSD90NgVs#3D&hash=e5cf30dad62b0b57d7b7f5ba524cccacdbb36d2f747746e7fbebb7717b415820&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=089812219500119J&tid=spdf-a9dae0e9-65fd-4f31-bf3f-e0952eb4176c&sid=5c8c88eb95ed9742632ae57532a4a6e1c6b1gxrqa&type=client
        # Faster for large systems i.e. >50 states
        # Options
        max_iters = 1000
        epsilon_P = 1e-5
        epsilon_S = 1e-5
        # Initialize
        if matrixtype == 'P' or matrixtype == 'PS':
            if P00 is None:
                P = np.copy(Q)
            else:
                P = P00
        if matrixtype == 'S' or matrixtype == 'PS':
            if S00 is None:
                S = np.copy(S0)
            else:
                S = S00
        iterc = 0
        converged = False
        stop = False
        while not stop:
            if matrixtype == 'P' or matrixtype == 'PS':
                P_prev = P
                APAunc = np.zeros([n, n])
                for i in range(p):
                    APAunc += a[i] * mdot(Aa[:, :, i].T, P, Aa[:, :, i])
                BPBunc = np.zeros([n, n])
                for j in range(q):
                    BPBunc += b[j] * mdot(K.T, Bb[:, :, j].T, P, Bb[:, :, j],
                                          K)
                AAP = AK.T
                QQP = sympart(Q + mdot(K.T, R, K) + APAunc + BPBunc)
                P = dlyap(AAP, QQP)
                if np.any(np.isnan(P)) or np.any(
                        np.isinf(P)) or not is_pos_def(P):
                    stable = False
                try:
                    converged_P = la.norm(P - P_prev, 2) / la.norm(
                        P_prev, 2) < epsilon_P
                    stable2 = True
                except:
                    # print(P)
                    # print(P_prev)
                    # print(P-P_prev)
                    # print(la.norm())
                    stable2 = False
                    # print('')
            if matrixtype == 'S' or matrixtype == 'PS':
                S_prev = S
                ASAunc = np.zeros([n, n])
                for i in range(p):
                    ASAunc += a[i] * mdot(Aa[:, :, i], S, Aa[:, :, i].T)
                BSBunc = np.zeros([n, n])
                for j in range(q):
                    BSBunc = b[j] * mdot(Bb[:, :, j], K, S, K.T, Bb[:, :, j].T)
                AAS = AK
                QQS = sympart(S0 + ASAunc + BSBunc)
                S = dlyap(AAS, QQS)
                if np.any(np.isnan(S)) or not is_pos_def(S):
                    stable = False
                converged_S = la.norm(S - S_prev, 2) / la.norm(S,
                                                               2) < epsilon_S
            # Check for stopping condition
            if matrixtype == 'P':
                converged = converged_P
            elif matrixtype == 'S':
                converged = converged_S
            elif matrixtype == 'PS':
                converged = converged_P and converged_S
            if iterc >= max_iters:
                stable = False
            else:
                iterc += 1
            stop = converged or not stable or not stable2
    #        print('\ndlyap iters = %s' % str(iterc))

    elif algo == 'finite_horizon':
        P = np.copy(Q)
        Pt = np.copy(Q)
        S = np.copy(Q)
        St = np.copy(Q)
        converged = False
        stop = False
        while not stop:
            if matrixtype == 'P' or matrixtype == 'PS':
                APAunc = np.zeros([n, n])
                for i in range(p):
                    APAunc += a[i] * mdot(Aa[:, :, i].T, Pt, Aa[:, :, i])
                BPBunc = np.zeros([n, n])
                for j in range(q):
                    BPBunc += b[j] * mdot(K.T, Bb[:, :, j].T, Pt, Bb[:, :, j],
                                          K)
                Pt = mdot(AK.T, Pt, AK) + APAunc + BPBunc
                P += Pt
                converged_P = np.abs(Pt).sum() < 1e-15
                stable = np.abs(P).sum() < 1e10
            if matrixtype == 'S' or matrixtype == 'PS':
                ASAunc = np.zeros([n, n])
                for i in range(p):
                    ASAunc += a[i] * mdot(Aa[:, :, i], St, Aa[:, :, i].T)
                BSBunc = np.zeros([n, n])
                for j in range(q):
                    BSBunc = b[j] * mdot(Bb[:, :, j], K, St, K.T, Bb[:, :,
                                                                     j].T)
                St = mdot(AK, Pt, AK.T) + ASAunc + BSBunc
                S += St
                converged_S = np.abs(St).sum() < 1e-15
                stable = np.abs(S).sum() < 1e10
            if matrixtype == 'P':
                converged = converged_P
            elif matrixtype == 'S':
                converged = converged_S
            elif matrixtype == 'PS':
                converged = converged_P and converged_S
            stop = converged or not stable
    if not stable:
        P = None
        S = None
        if show_warn:
            warnings.simplefilter('always', UserWarning)
            warn('System is possibly not mean-square stable')
    if matrixtype == 'P':
        return P
    elif matrixtype == 'S':
        return S
    elif matrixtype == 'PS':
        return P, S