def anomaly_AR_Qstat(st, p): """ Use Auto Regressive prediction and Q statistic to calculate anomalies Step 1: Calculate prediction of next data point z_t+1 by fitting an AR model to each hidden Variable, predicting each hi_t+1 and then projecting h_t+1 using the basis Q at current time t. Step 2: Track a sample of the residual res_t,l = [(z_t-l - predicted_z_t-l)+ ... + (z_t - predicted_z_t)] Step 3: Calculate Q statistic of residual sample and test H0 = No correlation in residuals, all iid, just noise. Ha = Not all iid. h_window is ht_AR_win x numStreams """ if not st.has_key('Q_stat'): st['Q_stat'] = np.zeros(st['ht'].size) * np.nan st['pred_h'] = np.zeros(st['ht'].size) * np.nan st['coeffs'] = np.zeros((st['ht'].size,p['AR_order'])) * np.nan st['h_res'] = np.zeros(st['ht'].size) st['h_res_aa'] = np.zeros(1) st['h_res_norm'] = np.zeros(1) # Build/Slide h_window if st.has_key('h_window'): st['h_window'][:-1,:] = st['h_window'][1:,:] # Shift Window st['h_window'][-1,:] = np.nan_to_num(st['ht']) else: st['h_window'] = np.zeros((p['ht_AR_win'], st['ht'].size)) st['h_window'][-1,:] = np.nan_to_num(st['ht']) ''' Forcasting ''' if st['t'] > p['ht_AR_win']: # Calculate error in last h prediction if st.has_key('pred_h'): st['h_res'] = np.nan_to_num(st['pred_h']) - np.nan_to_num(st['ht']) st['h_res_aa'] = np.abs(st['h_res']).sum() / st['ht'].size st['h_res_norm'] = npl.norm(st['h_res']) # Build/Slide h_residual_window if st.has_key('h_res_win'): st['h_res_win'][:-1,:] = st['h_res_win'][1:,:] # Shift Window st['h_res_win'][-1,:] = np.nan_to_num(st['h_res']) else: st['h_res_win'] = np.zeros((p['ht_AR_win'], st['h_res'].size)) st['h_res_win'][-1,:] = np.nan_to_num(st['h_res']) # Calculate Q statistic: per h: arhhhg for i in xrange(st['coeffs'].shape[0]): # coeffs.shape[0] correspond to r at last time step st['Q_stat'][i] = Q_stat(st['h_res_win'][:,i], st['coeffs'][i,:], p['Q_lag']) # Get Coefficents for ht+1 # st['pred_h'] = np.zeros((st['r'],1)) st['coeffs'] = np.zeros((st['ht'].size,p['AR_order'])) * np.nan for i in range(st['r']): st['coeffs'][i, :] = burg_AR(p['AR_order'], st['h_window'][:,i]) for j in range(p['AR_order']): st['pred_h'][i] -= st['coeffs'][i,j] * st['h_window'][-1-j, i] # Calculate Prediction error based on last time step prediction st['zt_res'] = np.abs(st['pred_zt'] - zt.T) st['zt_res_aa'] = np.abs(st['pred_zt'] - zt.T).sum() / numStreams st['zt_res_norm'] = npl.norm(st['pred_zt'] - zt.T) # Update prediction for next time step st['pred_zt'] = dot(st['Q'][:,:st['r']], st['pred_h'][:st['r']]).T '''Anomaly Test''' #if st['pred_err_norm'] > p['err_thresh']: #st['anomaly'] = True return st
def anomaly_AR_forcasting(st, p): """ Use Auto Regressive prediction to calculate anomalies h_window is ht_AR_win x numStreams """ # initialise variables that are not yet present if not st.has_key('t_stat'): st['t_stat'] = 0 st['pred_dsn'] = 0 st['x_sample'] = 0 # Build/Slide h_window if st.has_key('h_window'): st['h_window'][:-1,:] = st['h_window'][1:,:] # Shift Window st['h_window'][-1,:] = np.nan_to_num(st['ht']) else: st['h_window'] = np.zeros((p['ht_AR_win'], st['ht'].size)) st['h_window'][-1,:] = np.nan_to_num(st['ht']) ''' Forcasting ''' if st['t'] > p['ht_AR_win']: # Get Coefficents for ht+1 # Get h-buffer window (can speed this up latter) #h_buffer = np.nan_to_num(res['hidden'][t-h_AR_buff:t, :]) pred_h = np.zeros((st['r'],1)) for i in range(st['r']): coeffs = burg_AR(p['AR_order'], st['h_window'][:,i]) for j in range(p['AR_order']): pred_h[i,0] -= coeffs[j] * st['h_window'][-1-j, i] # Calculate Prediction error based on last time step prediction st['pred_err'] = np.abs(st['pred_zt'] - zt.T) st['pred_err_ave'] = np.abs(st['pred_zt'] - zt.T).sum() / numStreams st['pred_err_norm'] = npl.norm(st['pred_zt'] - zt.T) # Update prediction for next time step st['pred_zt'] = dot(st['Q'][:,:st['r']], pred_h).T '''Anomaly Test''' # Build/Slide pred_err_window if st.has_key('pred_err_win'): st['pred_err_win'][:-1] = st['pred_err_win'][1:] # Shift Window st['pred_err_win'][-1] = st['pred_err_norm']**2 #st['pred_err_win'][-1] = st['pred_err_norm'] else: st['pred_err_win'] = np.zeros(p['sample_N'] + p['dependency_lag']) st['pred_err_win'][-1] = st['pred_err_norm']**2 #st['pred_err_win'][-1] = st['pred_err_norm'] if st['t'] >= (p['sample_N'] + p['dependency_lag']) : # Differenced squared norms of the residules. #st['pred_diff_sq_norm'] = st['pred_err_win'][::2] - st['pred_err_win'][1::2] #st['pred_diff_sq_norm'] = np.diff(st['pred_err_win'], axis = 0)[::2] st['pred_diff_sq_norm'] = np.diff(st['pred_err_win'], axis = 0) st['pred_dsn'] = st['pred_diff_sq_norm'][-1] st['x_sample'] = (st['pred_diff_sq_norm'][-(p['sample_N'] + p['dependency_lag']):-p['dependency_lag']]**2).sum() st['t_stat'] = st['pred_diff_sq_norm'][-1] / np.sqrt(st['x_sample']/ p['sample_N']) if np.abs(st['t_stat']) > p['x_thresh']: st['anomaly'] = True return st
def anomaly_AR_Qstat(st, p): """ Use Auto Regressive prediction and Q statistic to calculate anomalies Step 1: Calculate prediction of next data point z_t+1 by fitting an AR model to each hidden Variable, predicting each hi_t+1 and then projecting h_t+1 using the basis Q at current time t. Step 2: Track a sample of the residual res_t,l = [(z_t-l - predicted_z_t-l)+ ... + (z_t - predicted_z_t)] Step 3: Calculate Q statistic of residual sample and test H0 = No correlation in residuals, all iid, just noise. Ha = Not all iid. h_window is ht_AR_win x numStreams """ if not st.has_key('Q_stat'): st['Q_stat'] = np.zeros(st['ht'].size) * np.nan st['pred_h'] = np.zeros(st['ht'].size) * np.nan st['coeffs'] = np.zeros((st['ht'].size, p['AR_order'])) * np.nan st['h_res'] = np.zeros(st['ht'].size) st['h_res_aa'] = np.zeros(1) st['h_res_norm'] = np.zeros(1) # Build/Slide h_window if st.has_key('h_window'): st['h_window'][:-1, :] = st['h_window'][1:, :] # Shift Window st['h_window'][-1, :] = np.nan_to_num(st['ht']) else: st['h_window'] = np.zeros((p['ht_AR_win'], st['ht'].size)) st['h_window'][-1, :] = np.nan_to_num(st['ht']) ''' Forcasting ''' if st['t'] > p['ht_AR_win']: # Calculate error in last h prediction if st.has_key('pred_h'): st['h_res'] = np.nan_to_num(st['pred_h']) - np.nan_to_num(st['ht']) st['h_res_aa'] = np.abs(st['h_res']).sum() / st['ht'].size st['h_res_norm'] = npl.norm(st['h_res']) # Build/Slide h_residual_window if st.has_key('h_res_win'): st['h_res_win'][:-1, :] = st['h_res_win'][ 1:, :] # Shift Window st['h_res_win'][-1, :] = np.nan_to_num(st['h_res']) else: st['h_res_win'] = np.zeros((p['ht_AR_win'], st['h_res'].size)) st['h_res_win'][-1, :] = np.nan_to_num(st['h_res']) # Calculate Q statistic: per h: arhhhg for i in xrange( st['coeffs'].shape[0] ): # coeffs.shape[0] correspond to r at last time step st['Q_stat'][i] = Q_stat(st['h_res_win'][:, i], st['coeffs'][i, :], p['Q_lag']) # Get Coefficents for ht+1 # st['pred_h'] = np.zeros((st['r'],1)) st['coeffs'] = np.zeros((st['ht'].size, p['AR_order'])) * np.nan for i in range(st['r']): st['coeffs'][i, :] = burg_AR(p['AR_order'], st['h_window'][:, i]) for j in range(p['AR_order']): st['pred_h'][i] -= st['coeffs'][i, j] * st['h_window'][-1 - j, i] # Calculate Prediction error based on last time step prediction st['zt_res'] = np.abs(st['pred_zt'] - zt.T) st['zt_res_aa'] = np.abs(st['pred_zt'] - zt.T).sum() / numStreams st['zt_res_norm'] = npl.norm(st['pred_zt'] - zt.T) # Update prediction for next time step st['pred_zt'] = dot(st['Q'][:, :st['r']], st['pred_h'][:st['r']]).T '''Anomaly Test''' #if st['pred_err_norm'] > p['err_thresh']: #st['anomaly'] = True return st
def FRAHST_V6_5(data, r=1, alpha=0.96, L = 1, h_AR_buff = 30, AR_order = 5, holdOffTime=0, evalMetrics = 'F', EW_mean_alpha = 0.1, EWMA_filter_alpha = 0.3, residual_thresh = 0.1, F_min = 0.9, epsilon = 0.05, static_r = 0, r_upper_bound = None, fix_init_Q = 0, ignoreUp2 = 0): """ Fast Rank Adaptive Householder Subspace Tracking Algorithm (FRAHST) Version 6.5 - Investigate forcasting on hidden variables as anomaly measure. Version 6.4 - Problem with skips if Z < 0. happens when zt< ht. Only a problem when r --> N. Eigen values not updataed. - Fixed by using Strobarchs alternative eigenvalue approx method in this case. Still relies on alpha ~ 1. - Introduced Z normalisation as preprocessing method. MA/EWMA removes correlations. Version 6.3 - Now uses only a single threshold F_min and the tollerance parameter epsilon. - Fixed error in rank adaptation (keeper deleted row and col of Q, instead of just col) Version 6.2 - In light of 6.1.5, EWMA incrementally incorperated, and cleaned up a bit. - Now uses an extra parameter epsilon to buffer thresholding condition. Version 6.1.5 - Tried useing CUSUM on Energy ratio to detect anomalous points. - Also Have the option to fix r or allow to adapt. Though right parameters for adaptation require some experimentation. - NOt yet incorperated, tried to run just as a batch on res['e_ratio'], but was a lot slower than previously thought < 300 seconds. W^2 time with window length W. A quick test with EWMA_filter was MUCH MUCH quicker < 1 second. Will likely use EWMA instead of CUSUM. To_do: add EWMA filter to algorithm output.... Version 6.1 - basicly 6.0 but without the junk func + the actual new eigen(enegy)tracking - Turns out E_dash_t ~ S_trace or sum(eig_val) E_t ~ EW_var2(zt) discounted by alpha a la covarience matrix - no need to calculate incremental mean and var anymore - Thresholding mechanism now uses two thresholds. - if below the lowest -- > increment r - if abouve the higher --> test if (E_dast_t - eig_i ) / E_t is above e_high, if so remove dimentions. - difference between e_low and e_high acts as a 'safety' buffer, as removing an eig can result in too much variance being subtracted because eigs are only smoothed estimates of the true values. Takes time for est_eit to reach true eigs. - NEXT (maybe) Normalisation of data optional as a preprocessing of data. Version 6.0 - Aim: Different rank adjusting mechanism compares sum of r eigenvalues to variance of entire data. - Performes incremental calculation of data mean and variance. (no longer in later version ) Version 5.0 - No changes of 5.0 incorperated in this version Version 4.0 - Now also approximates eigenvalues for the approximated tracked basis for the eignevectors - Approach uses an orthogonal iteration arround X.T - Note, only a good approximation if alpha ~< 1. Used as its the fastest method as X.T b --> b must be solved anyway. - New entries in res ['eig_val'] - estimated eigenvalues ['true_eig_val'] - explicitly calculated eigenvalues (only if evalMetrics = T) VErsion 3.4 - input data z is time lagged series up to length l. - Algorithm is essentially same as 3.3, just adds pre processing to data vector - input Vector z_t is now of length (N times L) where L is window length - Use L = 1 for same results as 3.3 - Q is increased accordingly Version 3.3 - Add decay of S and in the event of vanishing inputs - Make sure rank of S does not drop (and work out what that means!) - stops S going singular Version 3.2 - Added ability to fix r to a static value., and also give it an upper bound. If undefined, defaults to num of data streams. Version 3.1 - Combines good bits of Pedros version, with my correction of the bugs Changed how the algorithm deals with sci. only difference, but somehow has a bigish effect on the output. """ # Initialise variables and data structures ######################################### # Derived Variables # Length of z or numStreams is now N x L numStreams = data.shape[1] * L timeSteps = data.shape[0] if r_upper_bound == None : r_upper_bound = numStreams #for energy test last_Z_pos = bool(1) # bool flag lastChangeAt = 1 sumYSq = 0. sumXSq = 0. # Data Stores res = {'hidden' : zeros((timeSteps, numStreams)) * nan, # Array for hidden Variables 'E_t' : zeros([timeSteps, 1]), # total energy of data 'E_dash_t' : zeros([timeSteps, 1]), # hidden var energy 'e_ratio' : zeros([timeSteps, 1]), # Energy ratio 'RSRE' : zeros([timeSteps, 1]), # Relative squared Reconstruction error 'recon' : zeros([timeSteps, numStreams]), # reconstructed data 'r_hist' : zeros([timeSteps, 1]), # history of r values 'eig_val': zeros((timeSteps, numStreams)) * nan, # Estimated Eigenvalues 'zt_mean' : zeros((timeSteps, numStreams)), # history of data mean 'zt_var' : zeros((timeSteps, numStreams)), # history of data var 'zt_var2' : zeros((timeSteps, numStreams)), # history of data var 'S_trace' : zeros((timeSteps, 1)), # history of S trace 'skips' : zeros((timeSteps, 1)), # tracks time steps where Z < 0 'EWMA_res' : zeros((timeSteps, 1)), # residual of energy ratio not acounted for by EWMA 'Phi' : [], 'S' : [], 'Q' : [], 'w' : zeros((timeSteps, numStreams)), 'e' : zeros((timeSteps, numStreams)), 'anomalies' : [], 'forecast_err' : zeros((timeSteps, numStreams)), 'for_err_sum' : zeros((timeSteps, 1)), 'pred_zt' : zeros((timeSteps, numStreams))} # Error from forcasting with hidden vars # Initialisations # Q_0 if fix_init_Q != 0: # fix inital Q as identity q_0 = eye(numStreams); Q = q_0 Qt_min1 = q_0 else: # generate random orthonormal matrix N x r Q = eye(numStreams) # Max size of Q Qt_min1 = eye(numStreams) # Max size of Q Q_0, R_0 = qr(rand(numStreams,r)) Q[:,:r] = Q_0 Qt_min1[:,:r] = Q_0 # S_0 small_value = 0.0001 S = eye(numStreams) * small_value # Avoids Singularity # v-1 v = zeros((numStreams,1)) # U(t-1) for eigenvalue estimation U = eye(numStreams) # zt mean and var zt_mean = zeros((numStreams,1)) zt_var = zeros((numStreams,1)) zt_var2 = zeros((numStreams,1)) # NOTE algorithm's state (constant memory), S, Q and v and U are kept at max size # Use iterable for data # Now a generator to calculate z_tl iter_data = lag_inputs(data, L) # Main Loop # ############# for t in range(1, timeSteps + 1): #alias to matrices for current r Qt = Q[:, :r] vt = v[:r, :] St = S[:r, :r] Ut = U[:r, :r] zt = iter_data.next() '''Data Preprocessing''' # Update zt mean and var zt_var, zt_mean = EW_mean_var(zt, EW_mean_alpha, zt_var, zt_mean) zt_var2 = alpha_var(zt, alpha, zt_var2) # Convert to a column Vector # Already taken care of in this version # zt = zt.reshape(zt.shape[0],1) # Check S remains non-singular for idx in range(r): if S[idx, idx] < small_value: S[idx,idx] = small_value '''Begin main algorithm''' ht = dot(Qt.T, zt) Z = dot(zt.T, zt) - dot(ht.T , ht) if Z > 0 : last_Z_pos = 1 # Refined version, use of extra terms u_vec = dot(St , vt) X = (alpha * St) + (2 * alpha * dot(u_vec, vt.T)) + dot(ht, ht.T) # Estimate eigenValues + Solve Ax = b using QR decomposition b_vec, e_values, Ut = QRsolve_eigV(X.T, Z, ht, Ut) beta = 4 * (dot(b_vec.T , b_vec) + 1) phi_sq = 0.5 + (1.0 / sqrt(beta)) phi = sqrt(phi_sq) gamma = (1.0 - 2 * phi_sq) / (2 * phi) delta = phi / sqrt(Z) vt = gamma * b_vec St = X - ((1 /delta) * dot(vt , ht.T)) w = (delta * ht) - (vt) ee = delta * zt - dot(Qt , w) Qt_min1 = Qt Qt = Qt - 2 * dot(ee , vt.T) # store e and w res['w'][t-1,:r] = w.T[0,:] res['e'][t-1,:] = ee.T[0,:] else: # if Z is not > 0 if norm(zt) > 0 and norm(ht) > 0 : # May be due to zt <= ht St = alpha * St # Continue decay of St res['skips'][t-1] = 2 # record Skips else: # or may be due to zt and ht = 0 St = alpha * St # Continue decay of St res['skips'][t-1] = 1 # record Skips # Recalculate Eigenvalues using other method # (less fast, but does not need Z to be positive) if last_Z_pos == 1: # New U U2t_min1 = np.eye(r) #PHI = np.dot(Qt_min1.T, Qt) Wt = np.dot(St, U2t_min1) U2t, R2 = qr(Wt) # Decomposition PHI_U = np.dot(U2t_min1.T,U2t) e_values = np.diag(np.dot(R2,PHI_U)) elif last_Z_pos == 0: U2t_min1 = U2t #PHI = np.dot(Qt_min1.T, Qt) Wt = np.dot(St, U2t_min1) #Wt = np.dot(np.dot(St, PHI), U2t_min1) U2t, R2 = qr(Wt) #Decomposition PHI_U = np.dot(U2t_min1.T,U2t) e_values = np.diag(np.dot(R2,PHI_U)) #restore data structures Q[:,:r] = Qt v[:r,:] = vt S[:r, :r] = St U[:r,:r] = Ut ''' EVALUATION ''' # Deviations from true dominant subspace if evalMetrics == 'T' : if t == 1 : res['subspace_error'] = zeros((timeSteps,1)) res['orthog_error'] = zeros((timeSteps,1)) res['angle_error'] = zeros((timeSteps,1)) res['true_eig_val'] = ones((timeSteps, numStreams)) * np.NAN Cov_mat = zeros([numStreams,numStreams]) # Calculate Covarentce Matrix of data up to time t Cov_mat = alpha * Cov_mat + dot(zt, zt.T) # res['Phi'].append(Cov_mat) # # Get eigenvalues and eigenvectors W , V = eig(Cov_mat) # Use this to sort eigenVectors in according to deccending eigenvalue eig_idx = W.argsort() # Get sort index eig_idx = eig_idx[::-1] # Reverse order (default is accending) # v_r = highest r eigen vectors (accoring to thier eigenvalue if sorted). V_r = V[:, eig_idx[:r]] # Calculate subspace error C = dot(V_r , V_r.T) - dot(Qt , Qt.T) res['subspace_error'][t-1,0] = 10 * log10(trace(dot(C.T , C))) #frobenius norm in dB # Store True r Dominant Eigenvalues res['true_eig_val'][t-1,:r] = W[eig_idx[:r]] # Calculate angle between projection matrixes #D = dot(dot(dot(V_r.T, Qt), Qt.T), V_r) #eigVal, eigVec = eig(D) #angle = arccos(sqrt(max(eigVal))) #res['angle_error'][t-1,0] = angle # Calculate deviation from orthonormality F = dot(Qt.T , Qt) - eye(r) res['orthog_error'][t-1,0] = 10 * log10(trace(dot(F.T , F))) #frobenius norm in dB '''Store Values''' # Record data mean and Var res['zt_mean'][t-1,:] = zt_mean.T[0,:] res['zt_var'][t-1,:] = zt_var.T[0,:] res['zt_var2'][t-1,:] = zt_var2.T[0,:] # REcord S & Q res['S'].append(St) res['Q'].append(Qt) # Record S trace res['S_trace'][t-1] = np.trace(St) # Store eigen values if 'e_values' not in locals(): e_values = zt_var2 # Why this? else: res['eig_val'][t-1,:r] = e_values[:r] # Record reconstrunted z z_hat = dot(Qt , ht) res['recon'][t-1,:] = z_hat.T[0,:] # Record hidden variables res['hidden'][t-1, :r] = ht.T[0,:] # Record RSRE if t == 1: top = 0.0 bot = 0.0 top = top + (norm(zt - z_hat) ** 2 ) bot = bot + (norm(zt) ** 2) res['RSRE'][t-1, 0] = top / bot # Record r res['r_hist'][t-1, 0] = r ''' Forcasting ''' if t > h_AR_buff: # Get Coefficents for ht+1 # Get h-buffer window (can speed this up latter) h_buffer = np.nan_to_num(res['hidden'][t-h_AR_buff:t, :]) pred_h = np.zeros((r,1)) for i in range(r): coeffs = burg_AR(AR_order, h_buffer[:,i]) for j in range(AR_order): pred_h[i,0] -= coeffs[j] * h_buffer[-1-j, i] res['pred_zt'][t-1,:] = dot(Qt, pred_h).T res['forecast_err'][t-1,:] = np.abs(res['pred_zt'][t-2,:] - zt.T) res['for_err_sum'][t-1] = np.abs(res['pred_zt'][t-2,:] - zt.T).sum() / numStreams '''Rank Estimation''' # Calculate energies sumXSq = alpha * sumXSq + np.sum(zt ** 2) # Energy of Data sumYSq = alpha * sumYSq + np.sum(ht ** 2) # Energy of hidden Variables res['E_t'][t-1,0] = sumXSq res['E_dash_t'][t-1,0] = sumYSq if sumXSq == 0 : # Catch NaNs e_ratio = 0.0 else: e_ratio = sumYSq / sumXSq res['e_ratio'][t-1, 0] = e_ratio # Run EWMA on e_ratio if t == 1: pred_data = 0.0 # initialise value # Calculate residual usung last time steps prediction residual = np.abs(e_ratio - pred_data) res['EWMA_res'][t-1,0] = residual # Update prediction for next time step pred_data = EWMA_filter_alpha * e_ratio + (1-EWMA_filter_alpha) * pred_data # Threshold residual for anomaly if residual > residual_thresh and t> ignoreUp2: # Record time step of anomaly res['anomalies'].append(t-1) if static_r == 0: # optional parameter to keep r unchanged # Adjust Q_t, St and Ut for change in r if sumYSq < (F_min * sumXSq) and lastChangeAt < (t - holdOffTime) and r < r_upper_bound and t > ignoreUp2: """Note indexing with r works like r + 1 as index is from 0 in python""" # Extend Q by z_bar h_dash = dot(Q[:, :r].T, zt) z_bar = zt - dot(Q[:, :r] , h_dash) z_bar_norm = norm(z_bar) z_bar = z_bar / z_bar_norm Q[:numStreams, r] = z_bar.T[0,:] s_end = z_bar_norm ** 2 # Set next row and column to zero S[r, :] = 0.0 S[:, r] = 0.0 S[r, r] = s_end # change last element # Update Ut_1 # Set next row and column to zero U[r, :] = 0.0 U[:, r] = 0.0 U[r, r] = 1.0 # change last element # Update eigenvalue e_values = sp.r_[e_values, z_bar_norm ** 2] # This is the bit where the estimate is off? dont really have anything better # new r, increment r = r + 1 # Reset lastChange lastChangeAt = t elif sumYSq > (F_min * sumXSq) and lastChangeAt < t - holdOffTime and r > 1 and t > ignoreUp2: keeper = ones(r, dtype = bool) # Sorted in accending order #Â Causing problems, skip sorting, (quicker/simpler), and just cylce from with last # added eignevalue through to newest. #sorted_eigs = e_values[e_values.argsort()] acounted_var = sumYSq for idx in range(r)[::-1]: if ((acounted_var - e_values[idx]) / sumXSq) > F_min + epsilon: keeper[idx] = 0 acounted_var = acounted_var - e_values[idx] # use keeper as a logical selector for S and Q and U if not keeper.all(): # Delete rows/cols in Q, S, and U. newQ = Q[:,:r].copy() newQ = newQ[:,keeper] # cols eliminated Q[:newQ.shape[0], :newQ.shape[1]] = newQ newS = S[:r,:r].copy() newS = newS[keeper,:][:,keeper] # rows/cols eliminated S[:newS.shape[0], :newS.shape[1]] = newS newU = U[:r,:r].copy() newU = newU[keeper,:][:,keeper] # rows/cols eliminated U[:newU.shape[0], :newU.shape[1]] = newU r = keeper.sum() if r == 0 : r = 1 # Reset lastChange lastChangeAt = t return res
def anomaly_AR_forcasting(st, p): """ Use Auto Regressive prediction to calculate anomalies h_window is ht_AR_win x numStreams """ # initialise variables that are not yet present if not st.has_key('t_stat'): st['t_stat'] = 0 st['pred_dsn'] = 0 st['x_sample'] = 0 # Build/Slide h_window if st.has_key('h_window'): st['h_window'][:-1, :] = st['h_window'][1:, :] # Shift Window st['h_window'][-1, :] = np.nan_to_num(st['ht']) else: st['h_window'] = np.zeros((p['ht_AR_win'], st['ht'].size)) st['h_window'][-1, :] = np.nan_to_num(st['ht']) ''' Forcasting ''' if st['t'] > p['ht_AR_win']: # Get Coefficents for ht+1 # Get h-buffer window (can speed this up latter) #h_buffer = np.nan_to_num(res['hidden'][t-h_AR_buff:t, :]) pred_h = np.zeros((st['r'], 1)) for i in range(st['r']): coeffs = burg_AR(p['AR_order'], st['h_window'][:, i]) for j in range(p['AR_order']): pred_h[i, 0] -= coeffs[j] * st['h_window'][-1 - j, i] # Calculate Prediction error based on last time step prediction st['pred_err'] = np.abs(st['pred_zt'] - zt.T) st['pred_err_ave'] = np.abs(st['pred_zt'] - zt.T).sum() / numStreams st['pred_err_norm'] = npl.norm(st['pred_zt'] - zt.T) # Update prediction for next time step st['pred_zt'] = dot(st['Q'][:, :st['r']], pred_h).T '''Anomaly Test''' # Build/Slide pred_err_window if st.has_key('pred_err_win'): st['pred_err_win'][:-1] = st['pred_err_win'][1:] # Shift Window st['pred_err_win'][-1] = st['pred_err_norm']**2 #st['pred_err_win'][-1] = st['pred_err_norm'] else: st['pred_err_win'] = np.zeros(p['sample_N'] + p['dependency_lag']) st['pred_err_win'][-1] = st['pred_err_norm']**2 #st['pred_err_win'][-1] = st['pred_err_norm'] if st['t'] >= (p['sample_N'] + p['dependency_lag']): # Differenced squared norms of the residules. #st['pred_diff_sq_norm'] = st['pred_err_win'][::2] - st['pred_err_win'][1::2] #st['pred_diff_sq_norm'] = np.diff(st['pred_err_win'], axis = 0)[::2] st['pred_diff_sq_norm'] = np.diff(st['pred_err_win'], axis=0) st['pred_dsn'] = st['pred_diff_sq_norm'][-1] st['x_sample'] = (st['pred_diff_sq_norm'][-( p['sample_N'] + p['dependency_lag']):-p['dependency_lag']]**2 ).sum() st['t_stat'] = st['pred_diff_sq_norm'][-1] / np.sqrt( st['x_sample'] / p['sample_N']) if np.abs(st['t_stat']) > p['x_thresh']: st['anomaly'] = True return st