def hypothesisTesting(): for i in range(0,CSVcount): for j in range(0,CSVcount): csv1_name = "CSV" + str(i+1) csv2_name = "CSV" + str(j+1) csv1 = ResList[csv1_name] csv2 = ResList[csv2_name] relation = CsvRelations[i][j] if(relation == 0): pass elif(relation == -1): # Hypothesis 1 pass elif(relation == 2): pass elif(relation == 1): # Hypothesis 2 # Correlation Results anomalies_from_correlation = anomaliesFromWindowCorrelationWithConstantlag(csv1, csv2, window_size=15,maxlag=15, positive_correlation=True, pos=1, neg=1) # Slope Based Detection Technique # Extracting only data data1 = [x[1] for x in csv1] data2 = [x[1] for x in csv2] slope_based = slopeBasedDetection(data1,False,data2,False) anomalies_from_slope_based = anomalyDatesSlopeBaseddetetion(slope_based,csv1) (lr_based,lr_object) = linear_regression(data1, data2, 1) anomalies_from_lr = anomalies_from_linear_regression(lr_based,csv1) # Converting results to string resultString = "" resultString = "Anomalies from Correlation test <br>" resultString += "Start Date End Date Correlation Value<br>" for dataPoint in anomalies_from_correlation: resultString += str(dataPoint[0]) + " " + str(dataPoint[1]) + " " + str(dataPoint[2]) + "<br>" resultString += "Anomalies from Slope Based test <br>" resultString += "Start Date End Date Slope Value <br>" for dataPoint in anomalies_from_slope_based: resultString += str(dataPoint[0]) + " " + str(dataPoint[1]) + " " + str(dataPoint[2]) + " <br>" resultString += "Anomalies from Linear Regression test<br>" resultString += "Date X Val Y Val Expected Y Val Difference <br>" for dataPoint in anomalies_from_lr: resultString += str(dataPoint[0]) + " " + str(dataPoint[1]) + " " + str(dataPoint[2]) + " " + str(dataPoint[3]) + " " + str(dataPoint[4]) + "<br>" plotGraph(csv1,csv2,anomalies_from_correlation) return resultString elif(relation == -2): pass # Hypothesis 1 Methods # Correlation pass
def hypothesis4Testing(numOfFiles, *timeSeriesFileNames): if len(timeSeriesFileNames) != numOfFiles: print "Number of files mentioned do not match the specified files provided" return csvDataList = [] for fileName in timeSeriesFileNames: with open(fileName, 'rb') as f: reader = csv.reader(f) csvData = map(tuple, reader) csvDataList.append(csvData) testData= [] for i in csvDataList: td= getColumnFromListOfTuples(i,2) testData.append(convertListToFloat(td)) #print "testData" + str(testData) avgTimeSeries=findAverageTimeSeries(testData) #print "Average Time Series :::::: "+ str(avgTimeSeries) #Finding anomaly dates for every time series with average time series count=0 tcases=0 h4res=[] for i in testData: #print "Value of i ::::::::::::::::::::::::: "+ str(count) #ser = findDiffSeries(i,1) #print "Result of Ser::::::::::::::::"+ str(ser) #(r,s)=MADThreshold(ser) #print "Result of MAD TEST :::::::::::::::::::::::::"+ str(r)+ ":::"+str(s) #p =[x for x in ser if x > 100 or x < -100] #print "Exception list :::::::::::"+ str(p) #print "length of exception list ::::::::::::::"+ str(len(p)) #plt.plot(ser) #plt.show() temp= slopeCurveBasedDetection(i,avgTimeSeries,1) #temp= slopeBasedDetection(i,True,avgTimeSeries,True,7,True,0,0) tcases=tcases+len(temp) #print "TEMP :::::::::::::::::::::::::::::::::::::::::::::"+ str(csvDataList[count]) res= anomalyDatesSlopeBaseddetetion(temp,csvDataList[count]) h4res.append( (count,res)) #h4res.append( (count,temp)) count=count+1 #print "Final Result ::::::::::::::::"+ str(h4res) mergeDates(h4res[0][1]) print "Final Reported Anomalies ::::::::::::::::::: "+ str(tcases)