예제 #1
0
def hypothesis4Testing(numOfFiles, *timeSeriesFileNames):
    if len(timeSeriesFileNames) != numOfFiles:
        print "Number of files mentioned do not match the specified files provided"
        return
    
    csvDataList = []
    for fileName in timeSeriesFileNames:
        with open(fileName, 'rb') as f:
            reader = csv.reader(f)
            csvData = map(tuple, reader)
        csvDataList.append(csvData)
    
    testData= []
    for i in csvDataList:
        td= getColumnFromListOfTuples(i,2)
        testData.append(convertListToFloat(td))
    #print "testData" + str(testData)
    
    avgTimeSeries=findAverageTimeSeries(testData)
    #print "Average Time Series :::::: "+ str(avgTimeSeries)
    
    #Finding anomaly dates for every time series with average time series
    count=0
    tcases=0
    h4res=[]
    for i in testData:
        #print "Value of i ::::::::::::::::::::::::: "+ str(count)
        #ser = findDiffSeries(i,1)
        #print "Result of Ser::::::::::::::::"+ str(ser)
        #(r,s)=MADThreshold(ser)
        #print "Result of MAD TEST :::::::::::::::::::::::::"+ str(r)+ ":::"+str(s)
        #p =[x for x in ser if x > 100 or x < -100]
        #print "Exception list :::::::::::"+ str(p)
        #print "length of exception list ::::::::::::::"+ str(len(p))
        #plt.plot(ser)
        #plt.show()
        
        temp= slopeCurveBasedDetection(i,avgTimeSeries,1)
        #temp= slopeBasedDetection(i,True,avgTimeSeries,True,7,True,0,0)
        tcases=tcases+len(temp)
        #print "TEMP :::::::::::::::::::::::::::::::::::::::::::::"+ str(csvDataList[count])
        res= anomalyDatesSlopeBaseddetetion(temp,csvDataList[count])
        h4res.append( (count,res))
        #h4res.append( (count,temp))
        count=count+1
    #print "Final Result ::::::::::::::::"+ str(h4res)
    mergeDates(h4res[0][1])
    print "Final Reported Anomalies ::::::::::::::::::: "+ str(tcases)
예제 #2
0
def hypothesis4Testing(numOfFiles, *timeSeriesFileNames):
    if len(timeSeriesFileNames) != numOfFiles:
        print "Number of files mentioned do not match the specified files provided"
        return

    csvDataList = []  # 2D list storing data of each file
    for fileName in timeSeriesFileNames:
        with open(fileName, "rb") as f:
            reader = csv.reader(f)
            csvData = map(tuple, reader)
        csvDataList.append(csvData)

    centresList = []
    testData = []
    temp1 = []
    for i in csvDataList:
        td = getColumnFromListOfTuples(i, 2)  # wholesale price, indexing starts from 1
        testData.append(convertListToFloat(td))
        temp1 = getColumnFromListOfTuples(i, 0)
        temp2 = getColumnFromListOfTuples(i, 2)
        temp = zip(temp1, temp2)
        centresList.append(temp)
    # print "testData" + str(testData)

    avgTimeSeries = findAverageTimeSeries(testData)
    avgTimeSeries = zip(temp1, avgTimeSeries)
    # print "Average Time Series :::::: "+ str(avgTimeSeries)

    for i, c_list in enumerate(centresList):
        # CALL SLOPE BASED
        slopeBasedResult = slopeBased(c_list, False, avgTimeSeries, False)
        slopeBasedResult = mergeDates(slopeBasedResult)
        # Correlation
        correlationResult = anomaliesFromWindowCorrelationWithConstantlag(c_list, avgTimeSeries)
        correlationResult = mergeDates(correlationResult)
        # Linear Regression
        lrResult = linear_regression(avgTimeSeries, c_list, 1)
        lrResult = mergeDates(lrResult)
        result = intersection(
            3, slopeBasedResult, "slope_based", correlationResult, "correlation", lrResult, "linear_regression"
        )
        print "Anomalies fior time-series " + str(i) + " are:"
        for (a, b, c) in result:
            print str(a) + "," + str(b) + "," + str(c)