Exemple #1
# Runtime
workDir = 'vibrationdata/'
record = 'viadata'
numEigens = [6, 7, 8, 9, 10, 11, 12, 13]
winSizes = [500, 1000, 5000]
ssaWinSizes = [500, 1000, 5000, 10000, 50000]
numEigens = [6]
winSizes = [1000]
ssaWinSizes = [5000, 10000]
minError = 1e-4

datamgr.processData(record, workDir, 1)
origdf = datamgr.h5pyToDataFrame(record,

workDir = workDir + 'results/'  # save everything in results
for numEigen in numEigens:
    for winSize in winSizes:
        title = 'AverageInterpolation'
        df = origdf.copy()
        df_filled = nanAvgFill(df)
        df_ssa = ssaInterpolation(df_filled, numEigen, winSize, minError)
        for col in df:
            fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(15,15))
Exemple #2
def fullAnalysis(recordList, workDir='', lowPassFilter=None, sampleNumCol=0, embedDim=20, N=50):
    Perform the full ssa analysis, with graphs saved to workDir
    N is window size
    lowPassFilter is expected to be a list/array of [order, cutoff],
                  where cutoff is between 0 to 1, as digital filter
    Set sampleNumCol to 1 if they first column of the data file is simply the data index
    Ensure working directory exists
    Note that data file must be contained in the working directory
    ex. if recordList[0] = '31' then data file is '31.txt' in working directory
    if not os.path.exists(workDir[:len(workDir)-1]):
    for record in recordList:
        datamgr.processData(record, workDir, sampleNumCol) # Ensure hdf5 file for record exists
        datafp = datamgr.accessData(record, workDir)
        data = datafp[record] # numpy array to be used    
        rows, cols = data.shape    
        for col in xrange(0, cols):
            # Low Pass Filter
            order, cutoff = None, None
            if lowPassFilter is not None:
                order = lowPassFilter[0]
                cutoff = lowPassFilter[1]
                b, a = signal.butter(order, cutoff, btype='lowpass', analog=False, output='ba')
                data[:, col] = signal.filtfilt(b, a, data[:, col])
                # For saving images with cutoff + order info
                order = 'Order' + str(order)
                cutoff = 'Cutoff' + str(cutoff)
            # Graphs
            steps = 1
            if len(data) >= 100e3:
                steps = 10
            plt.plot(data[::steps, col], linewidth=0.5); 
            plt.xlabel('Sample Number', fontsize=20)
            plt.ylabel('$x$', fontsize=26)
            plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + '.png', dpi=150)
            plt.clf() # Free up memory
            clist=[]; slist=[]; vlist=[]; aclist = []
            scores = [0,0,0]
            P = len(data) / N # window size
            r = embedDim # embedding dimension
            for k in range(2*N-1):
                r = embedDim; 
                S = ssa(centre( data[int(k*P/2):int((k+2)*P/2), col] ), r) #int(P/2)
                L = (S/S[0])**2
                idc = array(range(1,r+1)); m=sum(L[:r])
                clist.append( dot(L[:r],idc)/m )
                variance = var(data[int(k*P/2):int((k+2)*P/2), col])
                aclist.append(dot(data[int(k*P/2):int((k+2)*P/2)-10, col],data[int(k*P/2)+10:int((k+2)*P/2), col])/variance)
            # alist will be big, therefore create a dataset inside
            # the temporary datafp to store alist
            alist = datafp.require_dataset('time', (len(data), len(data)), dtype=int32)
            alist = linspace(0, len(data), len(clist))
            pc = slope(alist,clist,0.05)
            ps = slope(alist,slist,0.05)
            pv = slope(alist,vlist,0.05)
            pac = slope(alist,aclist,0.05)
            if pc[0]<0 and abs(pc[1]/pc[0])<1: scores[0]=scores[0]+1
            if ps[0]>0 and abs(ps[1]/ps[0])<1: scores[1]=scores[1]+1
            if pv[0]>0 and abs(pv[1]/pv[0])<1: scores[2]=scores[2]+1
            outstr = "{0:6.3f}+/-{1:6.3f} ({2:4.2f}), {3:6.3f}+/-{4:6.3f} ({5:4.2f}), {6:6.3f}+/-{7:6.3f} ({8:4.2f})"
            print(outstr.format(pc[0], pc[1], abs(pc[1]/pc[0]), ps[0], ps[1], abs(ps[1]/ps[0]), pv[0], pv[1], abs(pv[1]/pv[0]) ))
            print scores
            plt.figure(figsize=(22,6), dpi=150); 
            plt.title("$\\nu$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\nu$", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'nuvss.png', dpi=150)
            plt.title("$\\lambda_1$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\lambda_1$", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'lamvss.png', dpi=150)
            plt.title("$\\sigma^2$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\sigma^2$", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'sigvss.png', dpi=300)
            plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'indicators500.png', dpi=150)
            plt.title("Autocorrelation vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("Autocorrelation", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + order + cutoff + 'sigvss.png', dpi=150)
            plt.clf() # Free up memory    
import datamanager as datamgr
import matplotlib.pyplot as plt

# Runtime
workDir = 'vibrationdata/'
record = 'viadata'

numEigens = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
winSizes = [500, 1000, 5000]
ssaWinSizes = [500, 1000, 5000, 10000, 50000]
minError = 1e-4

datamgr.processData(record, workDir, 1)
origdf = datamgr.h5pyToDataFrame(record, workDir, resample=True, deltaTime='5T')

workDir = workDir + 'results/' # save everything in results
for numEigen in numEigens:
    for winSize in winSizes:
        title = 'AverageInterpolation'
        df = origdf.copy()
        df_filled = nanAvgFill(df)
        df_ssa = ssaInterpolation(df_filled, numEigen, winSize, minError)
        for col in df:
            fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(15,15))
            df[col].plot(ax=axes[0]); axes[0].set_title('Original Data')
            df_filled[col].plot(ax=axes[1]); axes[1].set_title('Missing Points Interpolation w/ Average')
            df_ssa[col].plot(ax=axes[2]); axes[2].set_title('SSA Interpolation w/ eigens=' + str(numEigen)
Exemple #5
def fullAnalysis(recordList,
    Perform the full ssa analysis, with graphs saved to workDir
    N is window size
    lowPassFilter is expected to be a list/array of [order, cutoff],
                  where cutoff is between 0 to 1, as digital filter
    Set sampleNumCol to 1 if they first column of the data file is simply the data index
    Ensure working directory exists
    Note that data file must be contained in the working directory
    ex. if recordList[0] = '31' then data file is '31.txt' in working directory
    if not os.path.exists(workDir[:len(workDir) - 1]):
        os.makedirs(workDir[:len(workDir) - 1])

    for record in recordList:
        datamgr.processData(record, workDir,
                            sampleNumCol)  # Ensure hdf5 file for record exists
        datafp = datamgr.accessData(record, workDir)
        data = datafp[record]  # numpy array to be used
        rows, cols = data.shape

        for col in xrange(0, cols):

            # Low Pass Filter
            order, cutoff = None, None
            if lowPassFilter is not None:
                order = lowPassFilter[0]
                cutoff = lowPassFilter[1]
                b, a = signal.butter(order,
                data[:, col] = signal.filtfilt(b, a, data[:, col])

                # For saving images with cutoff + order info
                order = 'Order' + str(order)
                cutoff = 'Cutoff' + str(cutoff)

            # Graphs
            plt.figure(figsize=(12, 3))
            steps = 1
            if len(data) >= 100e3:
                steps = 10
            plt.plot(data[::steps, col], linewidth=0.5)
            plt.xlabel('Sample Number', fontsize=20)
            plt.ylabel('$x$', fontsize=26)
            plt.savefig(workDir + record + str(col + 1) + xstr(order) +
                        xstr(cutoff) + '.png',
            plt.clf()  # Free up memory

            clist = []
            slist = []
            vlist = []
            aclist = []
            scores = [0, 0, 0]
            P = len(data) / N  # window size
            r = embedDim  # embedding dimension
            for k in range(2 * N - 1):
                r = embedDim
                S = ssa(centre(data[int(k * P / 2):int((k + 2) * P / 2), col]),
                        r)  #int(P/2)
                L = (S / S[0])**2
                idc = array(range(1, r + 1))
                m = sum(L[:r])
                clist.append(dot(L[:r], idc) / m)
                slist.append(S[0]**2 / P)
                variance = var(data[int(k * P / 2):int((k + 2) * P / 2), col])
                    dot(data[int(k * P / 2):int((k + 2) * P / 2) - 10, col],
                        data[int(k * P / 2) + 10:int((k + 2) * P / 2), col]) /

            # alist will be big, therefore create a dataset inside
            # the temporary datafp to store alist
            alist = datafp.require_dataset('time', (len(data), len(data)),
            alist = linspace(0, len(data), len(clist))

            pc = slope(alist, clist, 0.05)
            ps = slope(alist, slist, 0.05)
            pv = slope(alist, vlist, 0.05)
            pac = slope(alist, aclist, 0.05)
            if pc[0] < 0 and abs(pc[1] / pc[0]) < 1: scores[0] = scores[0] + 1
            if ps[0] > 0 and abs(ps[1] / ps[0]) < 1: scores[1] = scores[1] + 1
            if pv[0] > 0 and abs(pv[1] / pv[0]) < 1: scores[2] = scores[2] + 1
            outstr = "{0:6.3f}+/-{1:6.3f} ({2:4.2f}), {3:6.3f}+/-{4:6.3f} ({5:4.2f}), {6:6.3f}+/-{7:6.3f} ({8:4.2f})"
                outstr.format(pc[0], pc[1], abs(pc[1] / pc[0]), ps[0], ps[1],
                              abs(ps[1] / ps[0]), pv[0], pv[1],
                              abs(pv[1] / pv[0])))

            print scores

            plt.figure(figsize=(22, 6), dpi=150)

            plt.plot(alist, clist, "bo")
            plt.plot(alist, alist * pc[0] + pc[2], "g")
            plt.title("$\\nu$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\nu$", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'nuvss.png', dpi=150)

            plt.plot(alist, slist, "bo")
            plt.plot(alist, alist * ps[0] + ps[2], "g")
            plt.title("$\\lambda_1$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\lambda_1$", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'lamvss.png', dpi=150)

            plt.plot(alist, vlist, "bo")
            plt.plot(alist, alist * pv[0] + pv[2], "g")
            plt.title("$\\sigma^2$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\sigma^2$", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'sigvss.png', dpi=300)
            plt.savefig(workDir + record + str(col + 1) + xstr(order) +
                        xstr(cutoff) + 'indicators500.png',
            plt.title("Autocorrelation vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("Autocorrelation", fontsize=26)
            #plt.savefig(workDir + record + str(col+1) + order + cutoff + 'sigvss.png', dpi=150)
            plt.clf()  # Free up memory