Esempio n. 1
0
def FRAHST_V6_4(data,
                r=1,
                alpha=0.96,
                L=1,
                holdOffTime=0,
                evalMetrics='F',
                EW_mean_alpha=0.1,
                EWMA_filter_alpha=0.3,
                residual_thresh=0.1,
                F_min=0.9,
                epsilon=0.05,
                static_r=0,
                r_upper_bound=None,
                fix_init_Q=0,
                ignoreUp2=0):
    """
    Fast Rank Adaptive Householder Subspace Tracking Algorithm (FRAHST)  
    
    Version 6.4 - Problem with skips if Z < 0. happens when zt< ht. Only a problem when r --> N. Eigen values not updataed.
                - Fixed by using Strobarchs alternative eigenvalue approx method in this case. Still relies on alpha ~ 1.
                - Introduced Z normalisation as preprocessing method. MA/EWMA removes correlations. 
    
    Version 6.3 - Now uses only a single threshold F_min and the tollerance parameter epsilon.
                - Fixed error in rank adaptation (keeper deleted row and col of Q, instead of just col) 
    
    Version 6.2 - In light of 6.1.5, EWMA incrementally incorperated, and cleaned up a bit.
                - Now uses an extra parameter epsilon to buffer thresholding condition. 
    
    Version 6.1.5 - Tried useing CUSUM on Energy ratio to detect anomalous points. 
                - Also Have the option to fix r or allow to adapt. Though right parameters 
                for adaptation require some experimentation.  
                - NOt yet incorperated, tried to run just as a batch on res['e_ratio'], but was
                a lot slower than previously thought < 300 seconds. W^2 time with window length W. A quick test with
                EWMA_filter was MUCH MUCH quicker < 1 second.
                Will likely use EWMA instead of CUSUM. 
                To_do: add EWMA filter to algorithm output....
        
    Version 6.1 - basicly 6.0 but without the junk func + the actual new eigen(enegy)tracking 
                - Turns out E_dash_t ~ S_trace or sum(eig_val)
                            E_t ~ EW_var2(zt) discounted by alpha a la covarience matrix    
                - no need to calculate incremental mean and var anymore 
                - Thresholding mechanism now uses two thresholds.
                     - if below the lowest -- > increment r
                     - if abouve the higher --> test if (E_dast_t - eig_i ) / E_t is above e_high, 
                       if so remove dimentions. 
                     - difference between e_low and e_high acts as a 'safety' buffer, as removing an eig can 
                      result in too much variance being subtracted because eigs are only smoothed estimates 
                      of the true values. Takes time for est_eit to reach true eigs.    

                - NEXT (maybe) Normalisation of data optional as a preprocessing of data.
                
    Version 6.0 - Aim: Different rank adjusting mechanism
                      compares sum of r eigenvalues to variance of entire data.
                - Performes incremental calculation of data mean and variance. (no longer in later version )

    Version 5.0 - No changes of 5.0 incorperated in this version 
        
    Version 4.0 - Now also approximates eigenvalues for the approximated tracked basis for the eignevectors          
                - Approach uses an orthogonal iteration arround X.T 
                - Note, only a good approximation if alpha ~< 1. Used as its the fastest method 
                as X.T b --> b must be solved anyway. 
                - New entries in res
                    ['eig_val'] - estimated eigenvalues
                    ['true_eig_val'] - explicitly calculated eigenvalues (only if evalMetrics = T) 
        
    VErsion 3.4 - input data z is time lagged series up to length l. 
                - Algorithm is essentially same as 3.3, just adds pre processing to data vector
                - input Vector z_t is now of length (N times L) where L is window length
                - Use L = 1 for same results as 3.3 
                - Q is increased accordingly 
                
    Version 3.3 - Add decay of S and in the event of vanishing inputs 
                - Make sure rank of S does not drop (and work out what that means!) - stops S going singular
        
    Version 3.2 -  Added ability to fix r to a static value., and also give it an upper bound.
                   If undefined, defaults to num of data streams. 
        
    Version 3.1 - Combines good bits of Pedros version, with my correction of the bugs
    
    Changed how the algorithm deals with sci. only difference, but somehow has a bigish 
    effect on the output.
    
    """

    # Initialise variables and data structures
    #########################################

    # Derived Variables
    # Length of z or numStreams is now N x L
    numStreams = data.shape[1] * L
    timeSteps = data.shape[0]

    if r_upper_bound == None:
        r_upper_bound = numStreams

    #for energy test
    last_Z_pos = bool()  # bool flag
    lastChangeAt = 1
    sumYSq = 0.
    sumXSq = 0.

    # Data Stores
    res = {
        'hidden': zeros(
            (timeSteps, numStreams)) * nan,  # Array for hidden Variables
        'E_t': zeros([timeSteps, 1]),  # total energy of data 
        'E_dash_t': zeros([timeSteps, 1]),  # hidden var energy
        'e_ratio': zeros([timeSteps, 1]),  # Energy ratio 
        'RSRE': zeros([timeSteps,
                       1]),  # Relative squared Reconstruction error 
        'recon': zeros([timeSteps, numStreams]),  # reconstructed data
        'r_hist': zeros([timeSteps, 1]),  # history of r values 
        'eig_val': zeros(
            (timeSteps, numStreams)) * nan,  # Estimated Eigenvalues 
        'zt_mean': zeros((timeSteps, numStreams)),  # history of data mean 
        'zt_var': zeros((timeSteps, numStreams)),  # history of data var  
        'zt_var2': zeros((timeSteps, numStreams)),  # history of data var  
        'S_trace': zeros((timeSteps, 1)),  # history of S trace
        'skips': zeros((timeSteps, 1)),  # tracks time steps where Z < 0 
        'EWMA_res': zeros(
            (timeSteps,
             1)),  # residual of energy ratio not acounted for by EWMA
        'Phi': [],
        'S': [],
        'Q': [],
        'anomalies': []
    }

    # Initialisations
    # Q_0
    if fix_init_Q != 0:  # fix inital Q as identity
        q_0 = eye(numStreams)
        Q = q_0
        Qt_min1 = q_0
    else:  # generate random orthonormal matrix N x r
        Q = eye(numStreams)  # Max size of Q
        Qt_min1 = eye(numStreams)  # Max size of Q
        Q_0, R_0 = qr(rand(numStreams, r))
        Q[:, :r] = Q_0
        Qt_min1[:, :r] = Q_0

    # S_0
    small_value = 0.0001
    S = eye(numStreams) * small_value  # Avoids Singularity
    # v-1
    v = zeros((numStreams, 1))
    # U(t-1) for eigenvalue estimation
    U = eye(numStreams)
    # zt mean and var
    zt_mean = zeros((numStreams, 1))
    zt_var = zeros((numStreams, 1))
    zt_var2 = zeros((numStreams, 1))

    # NOTE algorithm's state (constant memory), S, Q and v and U are kept at max size

    # Use iterable for data
    # Now a generator to calculate z_tl
    iter_data = lag_inputs(data, L)

    # Main Loop #
    #############
    for t in range(1, timeSteps + 1):

        #alias to matrices for current r
        Qt = Q[:, :r]
        vt = v[:r, :]
        St = S[:r, :r]
        Ut = U[:r, :r]

        zt = iter_data.next()
        '''Data Preprocessing'''
        # Update zt mean and var
        zt_var, zt_mean = EW_mean_var(zt, EW_mean_alpha, zt_var, zt_mean)
        zt_var2 = alpha_var(zt, alpha, zt_var2)

        # Convert to a column Vector
        # Already taken care of in this version
        # zt = zt.reshape(zt.shape[0],1)

        # Check S remains non-singular
        for idx in range(r):
            if S[idx, idx] < small_value:
                S[idx, idx] = small_value
        '''Begin main algorithm'''
        ht = dot(Qt.T, zt)

        Z = dot(zt.T, zt) - dot(ht.T, ht)

        if Z > 0:

            last_Z_pos = 1

            # Refined version, use of extra terms
            u_vec = dot(St, vt)
            X = (alpha * St) + (2 * alpha * dot(u_vec, vt.T)) + dot(ht, ht.T)

            # Estimate eigenValues + Solve Ax = b using QR decomposition
            b_vec, e_values, Ut = QRsolve_eigV(X.T, Z, ht, Ut)

            beta = 4 * (dot(b_vec.T, b_vec) + 1)

            phi_sq = 0.5 + (1.0 / sqrt(beta))

            phi = sqrt(phi_sq)

            gamma = (1.0 - 2 * phi_sq) / (2 * phi)

            delta = phi / sqrt(Z)

            vt = gamma * b_vec

            St = X - ((1 / delta) * dot(vt, ht.T))

            w = (delta * ht) - (vt)

            ee = delta * zt - dot(Qt, w)

            Qt_min1 = Qt
            Qt = Qt - 2 * dot(ee, vt.T)

        else:  # if Z is not > 0

            if norm(zt) > 0 and norm(ht) > 0:  # May be due to zt <= ht
                St = alpha * St  # Continue decay of St
                res['skips'][t - 1] = 2  # record Skips

            else:  # or may be due to zt and ht = 0
                St = alpha * St  # Continue decay of St
                res['skips'][t - 1] = 1  # record Skips

            # Recalculate Eigenvalues using other method
            # (less fast, but does not need Z to be positive)
            if last_Z_pos == 1:
                # New U
                U2t_min1 = np.eye(r)
                #PHI = np.dot(Qt_min1.T, Qt)
                Wt = np.dot(St, U2t_min1)
                U2t, R2 = qr(Wt)  # Decomposition
                PHI_U = np.dot(U2t_min1.T, U2t)
                e_values = np.diag(np.dot(R2, PHI_U))
            elif last_Z_pos == 0:
                U2t_min1 = U2t
                #PHI = np.dot(Qt_min1.T, Qt)
                Wt = np.dot(St, U2t_min1)
                #Wt = np.dot(np.dot(St, PHI), U2t_min1)
                U2t, R2 = qr(Wt)  #Decomposition
                PHI_U = np.dot(U2t_min1.T, U2t)
                e_values = np.diag(np.dot(R2, PHI_U))

        #restore data structures
        Q[:, :r] = Qt
        v[:r, :] = vt
        S[:r, :r] = St
        U[:r, :r] = Ut
        ''' EVALUATION '''
        # Deviations from true dominant subspace
        if evalMetrics == 'T':
            if t == 1:
                res['subspace_error'] = zeros((timeSteps, 1))
                res['orthog_error'] = zeros((timeSteps, 1))
                res['angle_error'] = zeros((timeSteps, 1))
                res['true_eig_val'] = ones((timeSteps, numStreams)) * np.NAN
                Cov_mat = zeros([numStreams, numStreams])

            # Calculate Covarentce Matrix of data up to time t
            Cov_mat = alpha * Cov_mat + dot(zt, zt.T)
            #
            res['Phi'].append(Cov_mat)
            #
            # Get eigenvalues and eigenvectors
            W, V = eig(Cov_mat)
            # Use this to sort eigenVectors in according to deccending eigenvalue
            eig_idx = W.argsort()  # Get sort index
            eig_idx = eig_idx[::-1]  # Reverse order (default is accending)
            # v_r = highest r eigen vectors (accoring to thier eigenvalue if sorted).
            V_r = V[:, eig_idx[:r]]
            # Calculate subspace error
            C = dot(V_r, V_r.T) - dot(Qt, Qt.T)
            res['subspace_error'][t - 1, 0] = 10 * log10(trace(dot(
                C.T, C)))  #frobenius norm in dB

            # Store True r Dominant Eigenvalues
            res['true_eig_val'][t - 1, :r] = W[eig_idx[:r]]

            # Calculate angle between projection matrixes
            #D = dot(dot(dot(V_r.T, Qt), Qt.T), V_r)
            #eigVal, eigVec = eig(D)
            #angle = arccos(sqrt(max(eigVal)))
            #res['angle_error'][t-1,0] = angle

            # Calculate deviation from orthonormality
            F = dot(Qt.T, Qt) - eye(r)
            res['orthog_error'][t - 1, 0] = 10 * log10(trace(dot(
                F.T, F)))  #frobenius norm in dB
        '''Store Values'''
        # Record data mean and Var
        res['zt_mean'][t - 1, :] = zt_mean.T[0, :]
        res['zt_var'][t - 1, :] = zt_var.T[0, :]
        res['zt_var2'][t - 1, :] = zt_var2.T[0, :]

        # REcord S & Q
        res['S'].append(St)
        res['Q'].append(Qt)

        # Record S trace
        res['S_trace'][t - 1] = np.trace(St)

        # Store eigen values
        if 'e_values' not in locals():
            e_values = zt_var2  # Why this?
        else:
            res['eig_val'][t - 1, :r] = e_values[:r]

        # Record reconstrunted z
        z_hat = dot(Qt, ht)
        res['recon'][t - 1, :] = z_hat.T[0, :]

        # Record hidden variables
        res['hidden'][t - 1, :r] = ht.T[0, :]

        # Record RSRE
        if t == 1:
            top = 0.0
            bot = 0.0

        top = top + (norm(zt - z_hat)**2)
        bot = bot + (norm(zt)**2)
        res['RSRE'][t - 1, 0] = top / bot

        # Record r
        res['r_hist'][t - 1, 0] = r
        '''Rank Estimation'''
        # Calculate energies
        sumXSq = alpha * sumXSq + np.sum(zt**2)  # Energy of Data
        sumYSq = alpha * sumYSq + np.sum(ht**2)  # Energy of hidden Variables

        res['E_t'][t - 1, 0] = sumXSq
        res['E_dash_t'][t - 1, 0] = sumYSq

        if sumXSq == 0:  # Catch NaNs
            e_ratio = 0.0
        else:
            e_ratio = sumYSq / sumXSq

        res['e_ratio'][t - 1, 0] = e_ratio

        # Run EWMA on e_ratio
        if t == 1:
            pred_data = 0.0  # initialise value

        # Calculate residual usung last time steps prediction
        residual = np.abs(e_ratio - pred_data)
        res['EWMA_res'][t - 1, 0] = residual
        # Update prediction for next time step
        pred_data = EWMA_filter_alpha * e_ratio + (
            1 - EWMA_filter_alpha) * pred_data

        # Threshold residual for anomaly
        if residual > residual_thresh and t > ignoreUp2:
            # Record time step of anomaly
            res['anomalies'].append(t - 1)

        if static_r == 0:  # optional parameter to keep r unchanged
            # Adjust Q_t, St and Ut for change in r
            if sumYSq < (F_min * sumXSq) and lastChangeAt < (
                    t - holdOffTime) and r < r_upper_bound and t > ignoreUp2:
                """Note indexing with r works like r + 1 as index is from 0 in python"""

                # Extend Q by z_bar
                h_dash = dot(Q[:, :r].T, zt)
                z_bar = zt - dot(Q[:, :r], h_dash)
                z_bar_norm = norm(z_bar)
                z_bar = z_bar / z_bar_norm
                Q[:numStreams, r] = z_bar.T[0, :]

                s_end = z_bar_norm**2

                # Set next row and column to zero
                S[r, :] = 0.0
                S[:, r] = 0.0
                S[r, r] = s_end  # change last element

                # Update Ut_1
                # Set next row and column to zero
                U[r, :] = 0.0
                U[:, r] = 0.0
                U[r, r] = 1.0  # change last element

                # Update eigenvalue
                e_values = sp.r_[e_values, z_bar_norm**2]
                # This is the bit where the estimate is off? dont really have anything better

                # new r, increment
                r = r + 1

                # Reset lastChange
                lastChangeAt = t

            elif sumYSq > (
                    F_min * sumXSq
            ) and lastChangeAt < t - holdOffTime and r > 1 and t > ignoreUp2:

                keeper = ones(r, dtype=bool)
                # Sorted in accending order
                # Causing problems, skip sorting, (quicker/simpler), and just cylce from with last
                # added eignevalue through to newest.
                #sorted_eigs = e_values[e_values.argsort()]

                acounted_var = sumYSq
                for idx in range(r)[::-1]:

                    if ((acounted_var - e_values[idx]) /
                            sumXSq) > F_min + epsilon:
                        keeper[idx] = 0
                        acounted_var = acounted_var - e_values[idx]

                # use keeper as a logical selector for S and Q and U
                if not keeper.all():

                    # Delete rows/cols in Q, S, and U.
                    newQ = Q[:, :r].copy()
                    newQ = newQ[:, keeper]  # cols eliminated
                    Q[:newQ.shape[0], :newQ.shape[1]] = newQ

                    newS = S[:r, :r].copy()
                    newS = newS[keeper, :][:, keeper]  # rows/cols eliminated
                    S[:newS.shape[0], :newS.shape[1]] = newS

                    newU = U[:r, :r].copy()
                    newU = newU[keeper, :][:, keeper]  # rows/cols eliminated
                    U[:newU.shape[0], :newU.shape[1]] = newU

                    r = keeper.sum()
                    if r == 0:
                        r = 1

                    # Reset lastChange
                    lastChangeAt = t

    return res
Esempio n. 2
0
# -*- coding: utf-8 -*-
Esempio n. 3
0
def FHST(data,
         init_Q,
         init_S,
         init_U,
         init_v,
         r=4,
         alpha=0.96,
         evalMetrics='F',
         ignoreUp2=0):
    """
    Fast Rank Adaptive Householder Subspace Tracking Algorithm (FRAHST)  
    
    Version 6.3 of FRAHST, but only simple FHST component, no adaptaion. static r.
    
    Iterative 
    
    returns Subspace tracked - Q
    
    """

    # Initialise variables and data structures
    #########################################
    # Derived Variables
    numStreams = data.shape[1]
    timeSteps = data.shape[0]

    # Data Stores
    res = {
        'hidden': zeros(
            (timeSteps, numStreams)) * nan,  # Array for hidden Variables
        'RSRE': zeros([timeSteps,
                       1]),  # Relative squared Reconstruction error 
        'recon': zeros([timeSteps, numStreams]),  # reconstructed data
        'r_hist': zeros([timeSteps, 1]),  # history of r values 
        'eig_val': zeros(
            (timeSteps, numStreams)) * nan,  # Estimated Eigenvalues 
        'zt_mean': zeros((timeSteps, numStreams)),  # history of data mean 
        'skips': zeros((timeSteps, 1)),  # tracks time steps where Z < 0 
        'EWMA_res': zeros(
            (timeSteps,
             1)),  # residual of energy ratio not acounted for by EWMA
        'S': [],
        'Q': []
    }

    Q = init_Q
    S = init_S
    v = init_v
    U = init_U

    iter_data = iter(data)

    # NOTE algorithm's state (constant memory), S, Q and v and U are kept at max size
    # Main Loop #
    #############
    for t in range(1, timeSteps + 1):

        #alias to matrices for current r
        Qt = Q[:, :r]
        vt = v[:r, :]
        St = S[:r, :r]
        Ut = U[:r, :r]

        zt = iter_data.next()
        '''Data Preprocessing'''
        # Convert to a column Vector
        zt = zt.reshape(zt.shape[0], 1)

        small_value = 0.0001
        # Check S remains non-singular
        for idx in range(r):
            if S[idx, idx] < small_value:
                S[idx, idx] = small_value
        '''Begin main algorithm'''
        ht = dot(Qt.T, zt)

        Z = dot(zt.T, zt) - dot(ht.T, ht)

        if Z > 0:

            # Refined version, use of extra terms
            u_vec = dot(St, vt)
            X = (alpha * St) + (2 * alpha * dot(u_vec, vt.T)) + dot(ht, ht.T)

            # Estimate eigenValues + Solve Ax = b using QR decomposition
            b_vec, e_values, Ut = QRsolve_eigV(X.T, Z, ht, Ut)

            beta = 4 * (dot(b_vec.T, b_vec) + 1)

            phi_sq = 0.5 + (1.0 / sqrt(beta))

            phi = sqrt(phi_sq)

            gamma = (1.0 - 2 * phi_sq) / (2 * phi)

            delta = phi / sqrt(Z)

            vt = gamma * b_vec

            St = X - ((1 / delta) * dot(vt, ht.T))

            w = (delta * ht) - (vt)

            ee = delta * zt - dot(Qt, w)

            Qt = Qt - 2 * dot(ee, vt.T)

        else:  # if Z is not > 0

            if norm(zt) > 0 and norm(ht) > 0:  # May be due to zt <= ht
                res['skips'][t - 1] = 2  # record Skips
            else:  # or may be due to zt and ht = 0
                St = alpha * St  # Continue decay of St
                res['skips'][t - 1] = 1  # record Skips

        #restore data structures
        Q[:, :r] = Qt
        v[:r, :] = vt
        S[:r, :r] = St
        U[:r, :r] = Ut
        ''' EVALUATION '''
        # Deviations from true dominant subspace
        if evalMetrics == 'T':
            if t == 1:
                res['subspace_error'] = zeros((timeSteps, 1))
                res['orthog_error'] = zeros((timeSteps, 1))
                res['angle_error'] = zeros((timeSteps, 1))
                res['true_eig_val'] = ones((timeSteps, numStreams)) * np.NAN
                Cov_mat = zeros([numStreams, numStreams])

            # Calculate Covarentce Matrix of data up to time t
            Cov_mat = alpha * Cov_mat + dot(zt, zt.T)

            # Get eigenvalues and eigenvectors
            W, V = eig(Cov_mat)
            # Use this to sort eigenVectors in according to deccending eigenvalue
            eig_idx = W.argsort()  # Get sort index
            eig_idx = eig_idx[::-1]  # Reverse order (default is accending)
            # v_r = highest r eigen vectors (accoring to thier eigenvalue if sorted).
            V_r = V[:, eig_idx[:r]]
            # Calculate subspace error
            C = dot(V_r, V_r.T) - dot(Qt, Qt.T)
            res['subspace_error'][t - 1, 0] = 10 * log10(trace(dot(
                C.T, C)))  #frobenius norm in dB

            # Store True r Dominant Eigenvalues
            res['true_eig_val'][t - 1, :r] = W[eig_idx[:r]]

            # Calculate deviation from orthonormality
            F = dot(Qt.T, Qt) - eye(r)
            res['orthog_error'][t - 1, 0] = 10 * log10(trace(dot(
                F.T, F)))  #frobenius norm in dB
        '''Store Values'''
        # Record S
        res['S'].append(St)
        res['Q'].append(Qt)

        # Store eigen values
        if 'e_values' not in locals():
            res['eig_val'][t - 1, :r] = 0.0
        else:
            res['eig_val'][t - 1, :r] = e_values[:r]

        # Record reconstrunted z
        z_hat = dot(Qt, ht)
        res['recon'][t - 1, :] = z_hat.T[0, :]

        # Record hidden variables
        res['hidden'][t - 1, :r] = ht.T[0, :]

        # Record RSRE
        if t == 1:
            top = 0.0
            bot = 0.0

        top = top + (norm(zt - z_hat)**2)
        bot = bot + (norm(zt)**2)
        res['RSRE'][t - 1, 0] = top / bot

        # Record r
        res['r_hist'][t - 1, 0] = r

    return res
Esempio n. 4
0
# -*- coding: utf-8 -*-
Esempio n. 5
0
    '''Begin main algorithm'''        
    ht = dot(Qt.T, zt) 
    Z = dot(zt.T, zt) - dot(ht.T , ht)

    if Z > 0 :
        
        # Flag for whether Z(t-1) > 0
        # Used for alternative eigenvalue calculation if Z < 0
        last_Z_positive = 1
            
        # Refined version, use of extra terms
        u_vec = dot(St , vt)
        X = (alpha * St) + (2 * alpha * dot(u_vec, vt.T)) + dot(ht, ht.T)
    
        # Estimate eigenValues + Solve Ax = b using QR decomposition 
        b_vec, e_values, Ut = QRsolve_eigV(X.T, Z, ht, Ut)
            
        beta  = 4 * (dot(b_vec.T , b_vec) + 1)
        
        phi_sq = 0.5 + (1.0 / sqrt(beta))
        
        phi = sqrt(phi_sq)
    
        gamma = (1.0 - 2 * phi_sq) / (2 * phi)
            
        delta = phi / sqrt(Z)
            
        vt = gamma * b_vec 
            
        St = X - ((1 /delta) * dot(vt , ht.T))