def findAnomalousWindows(normalFileName, abnormalFileName, metric, manyWins): if manyWins == True: winSize = range(100, 250, 10) else: winSize = [150, 200, 250] distanceType = ['AGGR'] normalize = [True] m = DataLoader.load(normalFileName) m.diff() m.removeColumnsButKeep([0, int(metric)]) abnormalMatrix = DataLoader.load(abnormalFileName) abnormalMatrix.diff() abnormalMatrix.removeColumnsButKeep([0, int(metric)]) printMessage("Finding outliers...") ret = [] for size in winSize: sphere = createHyperSphere(m, size) for t in distanceType: for n in normalize: outliers = findOutlierWindows(sphere, abnormalMatrix, size, t, n) # Return top-5 anomalous windows for i in range(5): if len(outliers) >= (i + 1): ret.append(outliers[i][0]) return ret
def compareNormalRuns(normalFile, abnormalFile): # Parameters winSize = [100, 125, 150, 175, 200] #winSize = range(200,400,20) k = 5 # top-k abnormal correlations d = 5 # top-d abnormal dimensions printMessage("comparing normal file : " + normalFile) printMessage("with abnormal file : " + abnormalFile) normM = DataLoader.load(normalFile) normM.diff() normM.removeColumns([0]) n = normM.cols abnormM = DataLoader.load(abnormalFile) abnormM.diff() abnormM.removeColumns([0]) # this will store the top correlations between normal and abnormal runs top_corrs = [] for w in winSize: print("win size = " + str(w)) normalCorrMatrix = normM.getCorrelationMatrix(w) abnormalCorrMatrix = abnormM.getCorrelationMatrix(w) print("rows = " + str(normalCorrMatrix.rows)) print("cols = " + str(abnormalCorrMatrix.cols)) corrList = getAbnormalCorrelations(normalCorrMatrix, abnormalCorrMatrix, k, d, values_only=True) # is k the optimal number here for i in range(0, k): top_corrs.append(corrList[i].diss) return top_corrs
def processFiles(normalFiles, abnormalFile): nFiles = normalFiles.split(",") if (len(nFiles) == 1): metricsAnalysis(normalFiles, abnormalFile) else: printMessage("Comparing against multiple normal files") top_corr_list = [] corr_per_file = [] for f in nFiles: top_corrs = compareNormalRuns(f, abnormalFile) # for plotting corr_per_file.append(top_corrs) for i in range(0, len(top_corrs)): top_corr_list.append([f, top_corrs[i]]) plotCorrelations(corr_per_file) # now sort the list w.r.t the distance sorted_corr_list = sorted(top_corr_list, key=operator.itemgetter(1)) # which nomal run dominates the first 25 distances? run_map = {} for i in range(0, 25): run = sorted_corr_list[i][0] if run not in run_map.keys(): run_map[run] = 1 else: run_map[run] = run_map[run] + 1 printMessage( "Frequcny map for normal runs for closest correlation distances :" + str(run_map)) closest_run = nFiles[0] max_num = run_map[nFiles[0]] for key in run_map.keys(): if (run_map[key] > max_num): max_num = run_map[key] closest_run = str(key) printMessage("Closest normal run to given abnormal run is : " + closest_run)
def metricsAnalysis(normalFile, abnormalFile): # Parameters winSize = [100, 125, 150, 175, 200] #winSize = range(200,400,20) K = [3] # top-k abnormal correlations D = [3] # top-d abnormal dimensions printMessage('Loading data files...') normM = DataLoader.load(normalFile) normM.diff() normM.removeColumns([0]) n = normM.cols abnormM = DataLoader.load(abnormalFile) abnormM.diff() abnormM.removeColumns([0]) # Get features names metrics = getFeaturesNames(normalFile) del (metrics[0]) # remove ID metric metricsRank = {} for w in winSize: printMessage('Calculating correlations for window-size: ' + str(w)) normalCorrMatrix = normM.getCorrelationMatrix(w) abnormalCorrMatrix = abnormM.getCorrelationMatrix(w) for k in K: for d in D: printMessage('Finding abnormal correlations...') corrList = getAbnormalCorrelations(normalCorrMatrix, abnormalCorrMatrix, k, d) abnormalMetrics = findAbnormalMetrics(corrList, metrics, n) for m in abnormalMetrics: if m not in metricsRank.keys(): metricsRank[m] = 1 else: metricsRank[m] = metricsRank[m] + 1 printResults(metricsRank)
def metricsAnalysis(normalFile, abnormalFile): # Parameters winSize = [100, 125, 150, 175, 200] #winSize = range(200,400,20) K = [3] # top-k abnormal correlations D = [3] # top-d abnormal dimensions printMessage('Loading data files...') normM = DataLoader.load(normalFile) normM.diff() normM.removeColumns([0]) n = normM.cols abnormM = DataLoader.load(abnormalFile) abnormM.diff() abnormM.removeColumns([0]) # Get features names metrics = getFeaturesNames(normalFile) del(metrics[0]) # remove ID metric metricsRank = {} for w in winSize: printMessage('Calculating correlations for window-size: ' + str(w)) normalCorrMatrix = normM.getCorrelationMatrix(w) abnormalCorrMatrix = abnormM.getCorrelationMatrix(w) for k in K: for d in D: printMessage('Finding abnormal correlations...') corrList = getAbnormalCorrelations(normalCorrMatrix, abnormalCorrMatrix, k, d) abnormalMetrics = findAbnormalMetrics(corrList, metrics, n) for m in abnormalMetrics: if m not in metricsRank.keys(): metricsRank[m] = 1 else: metricsRank[m] = metricsRank[m] + 1 printResults(metricsRank)
elif select_regions is True: mode = 'SELECT_REGIONS' elif select_classname is True: mode = 'SELECT_CLASSNAME' return mode ############################################################################# # Main script ############################################################################# # Parse options (options, args) = parseOptions() mode = getMode(options) abnormalFile = getAbnormalFile(options) normalFile = getNormalFile(options) manyWins = useManyWindows(options) printMessage('Normal File: ' + normalFile) printMessage('Abnormal File: ' + abnormalFile) if mode == 'SELECT_METRICS': processFiles(normalFile, abnormalFile) elif mode == 'SELECT_REGIONS': metric = getMetric(options) localizationAnalysis(normalFile, abnormalFile, metric, manyWins) elif mode == 'SELECT_CLASSNAME': className = getClassName(options) abnormal = getPrintAbnormal(options) findAnomalousFunctionWithStackTrace(normalFile, abnormalFile, abnormal, className)
if select_metrics is False and select_regions is False: HandleError.exit('Please use one of these options:\n--select-metrics OR --select-regions. \nUse -h option for help.') if select_metrics is True: mode = 'SELECT_METRICS' elif select_regions is True: mode = 'SELECT_REGIONS' return mode ############################################################################# # Main script ############################################################################# # Parse options (options, args) = parseOptions() mode = getMode(options) abnormalFile = getAbnormalFile(options) normalFile = getNormalFile(options) manyWins = useManyWindows(options) printMessage('Normal File: ' + normalFile) printMessage('Abnormal File: ' + abnormalFile) if mode == 'SELECT_METRICS': metricsAnalysis(normalFile, abnormalFile) elif mode == 'SELECT_REGIONS': metric = getMetric(options) localizationAnalysis(normalFile, abnormalFile, metric, manyWins)