def hypothesisTesting():
	for i in range(0,CSVcount):
		for j in range(0,CSVcount):
			csv1_name = "CSV" + str(i+1)
			csv2_name = "CSV" + str(j+1)
			csv1 = ResList[csv1_name]
			csv2 = ResList[csv2_name]
			relation = CsvRelations[i][j]
			
			if(relation == 0):
				pass
			elif(relation == -1):
			# Hypothesis 1
				pass
			elif(relation == 2):
				pass
			elif(relation == 1):
				# Hypothesis 2
				# Correlation Results
				anomalies_from_correlation = anomaliesFromWindowCorrelationWithConstantlag(csv1, csv2, window_size=15,maxlag=15, positive_correlation=True, pos=1, neg=1)
				# Slope Based Detection Technique
				# Extracting only data
				data1 = [x[1] for x in csv1]
				data2 = [x[1] for x in csv2]
				slope_based = slopeBasedDetection(data1,False,data2,False)
				anomalies_from_slope_based = anomalyDatesSlopeBaseddetetion(slope_based,csv1)
				(lr_based,lr_object) = linear_regression(data1, data2, 1)
				anomalies_from_lr = anomalies_from_linear_regression(lr_based,csv1)
				
				# Converting results to string
				resultString = ""
				resultString = "Anomalies from Correlation test <br>"
				resultString += "Start Date &nbsp;&nbsp;&nbsp;&nbsp; End Date &nbsp;&nbsp;&nbsp;&nbsp; Correlation Value<br>"
				for dataPoint in anomalies_from_correlation:
					resultString += str(dataPoint[0]) + "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[1]) + "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[2]) + "<br>"
				resultString += "Anomalies from Slope Based test <br>"
				resultString += "Start Date &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; End Date &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Slope Value <br>"
				for dataPoint in anomalies_from_slope_based:
					resultString += str(dataPoint[0]) + "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[1]) + "&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[2]) + "&nbsp;&nbsp;&nbsp;&nbsp; <br>" 
				resultString += "Anomalies from Linear Regression test<br>"
				resultString += "Date &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; X Val &nbsp;&nbsp;&nbsp;&nbsp; Y Val &nbsp;&nbsp;&nbsp;&nbsp; Expected Y Val &nbsp;&nbsp;&nbsp;&nbsp; Difference <br>"
				for dataPoint in anomalies_from_lr:
					resultString += str(dataPoint[0]) + "&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[1]) + "&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[2]) + "&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[3]) + "&nbsp;&nbsp;&nbsp;&nbsp;" + str(dataPoint[4]) + "<br>" 
				plotGraph(csv1,csv2,anomalies_from_correlation)
				return resultString
			elif(relation == -2):				
				pass
				
	# Hypothesis 1 Methods
	# Correlation
	pass 
Beispiel #2
0
def hypothesis4Testing(numOfFiles, *timeSeriesFileNames):
    if len(timeSeriesFileNames) != numOfFiles:
        print "Number of files mentioned do not match the specified files provided"
        return

    csvDataList = []  # 2D list storing data of each file
    for fileName in timeSeriesFileNames:
        with open(fileName, "rb") as f:
            reader = csv.reader(f)
            csvData = map(tuple, reader)
        csvDataList.append(csvData)

    centresList = []
    testData = []
    temp1 = []
    for i in csvDataList:
        td = getColumnFromListOfTuples(i, 2)  # wholesale price, indexing starts from 1
        testData.append(convertListToFloat(td))
        temp1 = getColumnFromListOfTuples(i, 0)
        temp2 = getColumnFromListOfTuples(i, 2)
        temp = zip(temp1, temp2)
        centresList.append(temp)
    # print "testData" + str(testData)

    avgTimeSeries = findAverageTimeSeries(testData)
    avgTimeSeries = zip(temp1, avgTimeSeries)
    # print "Average Time Series :::::: "+ str(avgTimeSeries)

    for i, c_list in enumerate(centresList):
        # CALL SLOPE BASED
        slopeBasedResult = slopeBased(c_list, False, avgTimeSeries, False)
        slopeBasedResult = mergeDates(slopeBasedResult)
        # Correlation
        correlationResult = anomaliesFromWindowCorrelationWithConstantlag(c_list, avgTimeSeries)
        correlationResult = mergeDates(correlationResult)
        # Linear Regression
        lrResult = linear_regression(avgTimeSeries, c_list, 1)
        lrResult = mergeDates(lrResult)
        result = intersection(
            3, slopeBasedResult, "slope_based", correlationResult, "correlation", lrResult, "linear_regression"
        )
        print "Anomalies fior time-series " + str(i) + " are:"
        for (a, b, c) in result:
            print str(a) + "," + str(b) + "," + str(c)