def hypothesis4Testing(numOfFiles, *timeSeriesFileNames): if len(timeSeriesFileNames) != numOfFiles: print "Number of files mentioned do not match the specified files provided" return csvDataList = [] for fileName in timeSeriesFileNames: with open(fileName, 'rb') as f: reader = csv.reader(f) csvData = map(tuple, reader) csvDataList.append(csvData) testData= [] for i in csvDataList: td= getColumnFromListOfTuples(i,2) testData.append(convertListToFloat(td)) #print "testData" + str(testData) avgTimeSeries=findAverageTimeSeries(testData) #print "Average Time Series :::::: "+ str(avgTimeSeries) #Finding anomaly dates for every time series with average time series count=0 tcases=0 h4res=[] for i in testData: #print "Value of i ::::::::::::::::::::::::: "+ str(count) #ser = findDiffSeries(i,1) #print "Result of Ser::::::::::::::::"+ str(ser) #(r,s)=MADThreshold(ser) #print "Result of MAD TEST :::::::::::::::::::::::::"+ str(r)+ ":::"+str(s) #p =[x for x in ser if x > 100 or x < -100] #print "Exception list :::::::::::"+ str(p) #print "length of exception list ::::::::::::::"+ str(len(p)) #plt.plot(ser) #plt.show() temp= slopeCurveBasedDetection(i,avgTimeSeries,1) #temp= slopeBasedDetection(i,True,avgTimeSeries,True,7,True,0,0) tcases=tcases+len(temp) #print "TEMP :::::::::::::::::::::::::::::::::::::::::::::"+ str(csvDataList[count]) res= anomalyDatesSlopeBaseddetetion(temp,csvDataList[count]) h4res.append( (count,res)) #h4res.append( (count,temp)) count=count+1 #print "Final Result ::::::::::::::::"+ str(h4res) mergeDates(h4res[0][1]) print "Final Reported Anomalies ::::::::::::::::::: "+ str(tcases)
def hypothesis4Testing(numOfFiles, *timeSeriesFileNames): if len(timeSeriesFileNames) != numOfFiles: print "Number of files mentioned do not match the specified files provided" return csvDataList = [] # 2D list storing data of each file for fileName in timeSeriesFileNames: with open(fileName, "rb") as f: reader = csv.reader(f) csvData = map(tuple, reader) csvDataList.append(csvData) centresList = [] testData = [] temp1 = [] for i in csvDataList: td = getColumnFromListOfTuples(i, 2) # wholesale price, indexing starts from 1 testData.append(convertListToFloat(td)) temp1 = getColumnFromListOfTuples(i, 0) temp2 = getColumnFromListOfTuples(i, 2) temp = zip(temp1, temp2) centresList.append(temp) # print "testData" + str(testData) avgTimeSeries = findAverageTimeSeries(testData) avgTimeSeries = zip(temp1, avgTimeSeries) # print "Average Time Series :::::: "+ str(avgTimeSeries) for i, c_list in enumerate(centresList): # CALL SLOPE BASED slopeBasedResult = slopeBased(c_list, False, avgTimeSeries, False) slopeBasedResult = mergeDates(slopeBasedResult) # Correlation correlationResult = anomaliesFromWindowCorrelationWithConstantlag(c_list, avgTimeSeries) correlationResult = mergeDates(correlationResult) # Linear Regression lrResult = linear_regression(avgTimeSeries, c_list, 1) lrResult = mergeDates(lrResult) result = intersection( 3, slopeBasedResult, "slope_based", correlationResult, "correlation", lrResult, "linear_regression" ) print "Anomalies fior time-series " + str(i) + " are:" for (a, b, c) in result: print str(a) + "," + str(b) + "," + str(c)