def hypothesisTesting(): for i in range(0,CSVcount): for j in range(0,CSVcount): csv1_name = "CSV" + str(i+1) csv2_name = "CSV" + str(j+1) csv1 = ResList[csv1_name] csv2 = ResList[csv2_name] relation = CsvRelations[i][j] if(relation == 0): pass elif(relation == -1): # Hypothesis 1 pass elif(relation == 2): pass elif(relation == 1): # Hypothesis 2 # Correlation Results anomalies_from_correlation = anomaliesFromWindowCorrelationWithConstantlag(csv1, csv2, window_size=15,maxlag=15, positive_correlation=True, pos=1, neg=1) # Slope Based Detection Technique # Extracting only data data1 = [x[1] for x in csv1] data2 = [x[1] for x in csv2] slope_based = slopeBasedDetection(data1,False,data2,False) anomalies_from_slope_based = anomalyDatesSlopeBaseddetetion(slope_based,csv1) (lr_based,lr_object) = linear_regression(data1, data2, 1) anomalies_from_lr = anomalies_from_linear_regression(lr_based,csv1) # Converting results to string resultString = "" resultString = "Anomalies from Correlation test <br>" resultString += "Start Date End Date Correlation Value<br>" for dataPoint in anomalies_from_correlation: resultString += str(dataPoint[0]) + " " + str(dataPoint[1]) + " " + str(dataPoint[2]) + "<br>" resultString += "Anomalies from Slope Based test <br>" resultString += "Start Date End Date Slope Value <br>" for dataPoint in anomalies_from_slope_based: resultString += str(dataPoint[0]) + " " + str(dataPoint[1]) + " " + str(dataPoint[2]) + " <br>" resultString += "Anomalies from Linear Regression test<br>" resultString += "Date X Val Y Val Expected Y Val Difference <br>" for dataPoint in anomalies_from_lr: resultString += str(dataPoint[0]) + " " + str(dataPoint[1]) + " " + str(dataPoint[2]) + " " + str(dataPoint[3]) + " " + str(dataPoint[4]) + "<br>" plotGraph(csv1,csv2,anomalies_from_correlation) return resultString elif(relation == -2): pass # Hypothesis 1 Methods # Correlation pass
def hypothesis4Testing(numOfFiles, *timeSeriesFileNames): if len(timeSeriesFileNames) != numOfFiles: print "Number of files mentioned do not match the specified files provided" return csvDataList = [] # 2D list storing data of each file for fileName in timeSeriesFileNames: with open(fileName, "rb") as f: reader = csv.reader(f) csvData = map(tuple, reader) csvDataList.append(csvData) centresList = [] testData = [] temp1 = [] for i in csvDataList: td = getColumnFromListOfTuples(i, 2) # wholesale price, indexing starts from 1 testData.append(convertListToFloat(td)) temp1 = getColumnFromListOfTuples(i, 0) temp2 = getColumnFromListOfTuples(i, 2) temp = zip(temp1, temp2) centresList.append(temp) # print "testData" + str(testData) avgTimeSeries = findAverageTimeSeries(testData) avgTimeSeries = zip(temp1, avgTimeSeries) # print "Average Time Series :::::: "+ str(avgTimeSeries) for i, c_list in enumerate(centresList): # CALL SLOPE BASED slopeBasedResult = slopeBased(c_list, False, avgTimeSeries, False) slopeBasedResult = mergeDates(slopeBasedResult) # Correlation correlationResult = anomaliesFromWindowCorrelationWithConstantlag(c_list, avgTimeSeries) correlationResult = mergeDates(correlationResult) # Linear Regression lrResult = linear_regression(avgTimeSeries, c_list, 1) lrResult = mergeDates(lrResult) result = intersection( 3, slopeBasedResult, "slope_based", correlationResult, "correlation", lrResult, "linear_regression" ) print "Anomalies fior time-series " + str(i) + " are:" for (a, b, c) in result: print str(a) + "," + str(b) + "," + str(c)