def compareNormalRuns(normalFile, abnormalFile): # Parameters winSize = [100, 125, 150, 175, 200] #winSize = range(200,400,20) k = 5 # top-k abnormal correlations d = 5 # top-d abnormal dimensions printMessage("comparing normal file : " + normalFile) printMessage("with abnormal file : " + abnormalFile) normM = DataLoader.load(normalFile) normM.diff() normM.removeColumns([0]) n = normM.cols abnormM = DataLoader.load(abnormalFile) abnormM.diff() abnormM.removeColumns([0]) # this will store the top correlations between normal and abnormal runs top_corrs = [] for w in winSize: print("win size = " + str(w)) normalCorrMatrix = normM.getCorrelationMatrix(w) abnormalCorrMatrix = abnormM.getCorrelationMatrix(w) print("rows = " + str(normalCorrMatrix.rows)) print("cols = " + str(abnormalCorrMatrix.cols)) corrList = getAbnormalCorrelations(normalCorrMatrix, abnormalCorrMatrix, k, d, values_only=True) # is k the optimal number here for i in range(0, k): top_corrs.append(corrList[i].diss) return top_corrs
def metricsAnalysis(normalFile, abnormalFile): # Parameters winSize = [100, 125, 150, 175, 200] #winSize = range(200,400,20) K = [3] # top-k abnormal correlations D = [3] # top-d abnormal dimensions printMessage('Loading data files...') normM = DataLoader.load(normalFile) normM.diff() normM.removeColumns([0]) n = normM.cols abnormM = DataLoader.load(abnormalFile) abnormM.diff() abnormM.removeColumns([0]) # Get features names metrics = getFeaturesNames(normalFile) del (metrics[0]) # remove ID metric metricsRank = {} for w in winSize: printMessage('Calculating correlations for window-size: ' + str(w)) normalCorrMatrix = normM.getCorrelationMatrix(w) abnormalCorrMatrix = abnormM.getCorrelationMatrix(w) for k in K: for d in D: printMessage('Finding abnormal correlations...') corrList = getAbnormalCorrelations(normalCorrMatrix, abnormalCorrMatrix, k, d) abnormalMetrics = findAbnormalMetrics(corrList, metrics, n) for m in abnormalMetrics: if m not in metricsRank.keys(): metricsRank[m] = 1 else: metricsRank[m] = metricsRank[m] + 1 printResults(metricsRank)
def metricsAnalysis(normalFile, abnormalFile): # Parameters winSize = [100, 125, 150, 175, 200] #winSize = range(200,400,20) K = [3] # top-k abnormal correlations D = [3] # top-d abnormal dimensions printMessage('Loading data files...') normM = DataLoader.load(normalFile) normM.diff() normM.removeColumns([0]) n = normM.cols abnormM = DataLoader.load(abnormalFile) abnormM.diff() abnormM.removeColumns([0]) # Get features names metrics = getFeaturesNames(normalFile) del(metrics[0]) # remove ID metric metricsRank = {} for w in winSize: printMessage('Calculating correlations for window-size: ' + str(w)) normalCorrMatrix = normM.getCorrelationMatrix(w) abnormalCorrMatrix = abnormM.getCorrelationMatrix(w) for k in K: for d in D: printMessage('Finding abnormal correlations...') corrList = getAbnormalCorrelations(normalCorrMatrix, abnormalCorrMatrix, k, d) abnormalMetrics = findAbnormalMetrics(corrList, metrics, n) for m in abnormalMetrics: if m not in metricsRank.keys(): metricsRank[m] = 1 else: metricsRank[m] = metricsRank[m] + 1 printResults(metricsRank)
#!/usr/bin/env python from localization import DataLoader, Column, Matrix import sys ############################################################################### # Main script ############################################################################### fileA = sys.argv[1] fileB = sys.argv[2] fileAMatrix = DataLoader.load(fileA) fileBMatrix = DataLoader.load(fileB) print "File A:", "cols:", fileAMatrix.cols, "rows:", fileAMatrix.rows print "File B:", "cols:", fileBMatrix.cols, "rows:", fileBMatrix.rows for i in range(fileAMatrix.cols): if i > 0: print "Comparing col", i colA = fileAMatrix.getCol(i) colB = fileBMatrix.getCol(i) for j in range(colA.size()): diff = float(colA.at(j)) - float(colB.at(j)) if diff > 0.001: print "\tDiff:", diff