Example #1
0
def fullAnalysis(recordList, workDir='', lowPassFilter=None, sampleNumCol=0, embedDim=20, N=50):
    indSlope = []   
    # Ensure working directory exists
    # Note that data file must be contained in the working directory
    # ex. if recordList[0] = '31' then data file is '31.txt' in working directory
    try:
        os.makedirs(workDir[:len(workDir)-1])
    except OSError:
        if not os.path.isdir(workDir[:len(workDir)-1]):
            raise
    
    for record in recordList:
        datamgr.processData(record, workDir, sampleNumCol) # Ensure hdf5 file for record exists
        datafp = datamgr.accessData(record, workDir)
        data = datafp[record] # numpy array to be used    
        rows, cols = data.shape    
        
        for col in xrange(0, cols):
            # Low Pass Filter
            order, cutoff = None, None
            if lowPassFilter is not None:
                order = lowPassFilter[0]
                cutoff = lowPassFilter[1]
                b, a = signal.butter(order, cutoff, btype='lowpass', analog=False, output='ba')
                data[:, col] = signal.filtfilt(b, a, data[:, col])
                
                # For saving images with cutoff + order info
                order = 'Order' + str(order)
                cutoff = 'Cutoff' + str(cutoff)

            clist=[]; slist=[]; vlist=[]; aclist = []
            scores = [0,0,0]
            P = len(data) / N # window size
            r = embedDim # embedding dimension
            for k in range(2*N-1):
                r = embedDim; 
                S = ssa(centre( data[int(k*P/2):int((k+2)*P/2), col] ), r) #int(P/2)
                L = (S/S[0])**2
                idc = array(range(1,r+1)); m=sum(L[:r])
                clist.append( dot(L[:r],idc)/m )
                slist.append(S[0]**2/P)
                variance = var(data[int(k*P/2):int((k+2)*P/2), col])
                vlist.append(variance)
                aclist.append(dot(data[int(k*P/2):int((k+2)*P/2)-10, col],data[int(k*P/2)+10:int((k+2)*P/2), col])/variance)
                
            # alist will be big, therefore create a dataset inside
            # the temporary datafp to store alist
            alist = datafp.require_dataset('time', (len(data), len(data)), dtype=int32)
            alist = linspace(0, len(data), len(clist))
            
            pc = slope(alist,clist,0.05)
            ps = slope(alist,slist,0.05)
            pv = slope(alist,vlist,0.05)
            pac = slope(alist,aclist,0.05)
            if pc[0]<0 and abs(pc[1]/pc[0])<1: scores[0]=scores[0]+1
            if ps[0]>0 and abs(ps[1]/ps[0])<1: scores[1]=scores[1]+1
            if pv[0]>0 and abs(pv[1]/pv[0])<1: scores[2]=scores[2]+1
            plt.clf() # Free up memory  
            indSlope.append([alist, alist*pc[0]+pc[2], alist*ps[0]+ps[2], alist*pv[0]+pv[2]])
        datafp.close()
    return indSlope
Example #2
0
def fullAnalysis(recordList, workDir='', lowPassFilter=None, sampleNumCol=0, embedDim=20, N=50):
    """
    Perform the full ssa analysis, with graphs saved to workDir
    N is window size
    lowPassFilter is expected to be a list/array of [order, cutoff],
                  where cutoff is between 0 to 1, as digital filter
    Set sampleNumCol to 1 if they first column of the data file is simply the data index
    Ensure working directory exists
    Note that data file must be contained in the working directory
    ex. if recordList[0] = '31' then data file is '31.txt' in working directory
    """
    if not os.path.exists(workDir[:len(workDir)-1]):
        os.makedirs(workDir[:len(workDir)-1])
    
    for record in recordList:
        datamgr.processData(record, workDir, sampleNumCol) # Ensure hdf5 file for record exists
        datafp = datamgr.accessData(record, workDir)
        data = datafp[record] # numpy array to be used    
        rows, cols = data.shape    
        
        for col in xrange(0, cols):
            
            # Low Pass Filter
            order, cutoff = None, None
            if lowPassFilter is not None:
                order = lowPassFilter[0]
                cutoff = lowPassFilter[1]
                b, a = signal.butter(order, cutoff, btype='lowpass', analog=False, output='ba')
                data[:, col] = signal.filtfilt(b, a, data[:, col])
                
                # For saving images with cutoff + order info
                order = 'Order' + str(order)
                cutoff = 'Cutoff' + str(cutoff)
            
            # Graphs
            plt.figure(figsize=(12,3)); 
            steps = 1
            if len(data) >= 100e3:
                steps = 10
            plt.plot(data[::steps, col], linewidth=0.5); 
            plt.xlabel('Sample Number', fontsize=20)
            plt.ylabel('$x$', fontsize=26)
            plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + '.png', dpi=150)
            plt.show()
            plt.clf() # Free up memory
            
            clist=[]; slist=[]; vlist=[]; aclist = []
            scores = [0,0,0]
            P = len(data) / N # window size
            r = embedDim # embedding dimension
            for k in range(2*N-1):
                r = embedDim; 
                S = ssa(centre( data[int(k*P/2):int((k+2)*P/2), col] ), r) #int(P/2)
                L = (S/S[0])**2
                idc = array(range(1,r+1)); m=sum(L[:r])
                clist.append( dot(L[:r],idc)/m )
                slist.append(S[0]**2/P)
                variance = var(data[int(k*P/2):int((k+2)*P/2), col])
                vlist.append(variance)
                aclist.append(dot(data[int(k*P/2):int((k+2)*P/2)-10, col],data[int(k*P/2)+10:int((k+2)*P/2), col])/variance)
                
            # alist will be big, therefore create a dataset inside
            # the temporary datafp to store alist
            alist = datafp.require_dataset('time', (len(data), len(data)), dtype=int32)
            alist = linspace(0, len(data), len(clist))
            
            pc = slope(alist,clist,0.05)
            ps = slope(alist,slist,0.05)
            pv = slope(alist,vlist,0.05)
            pac = slope(alist,aclist,0.05)
            if pc[0]<0 and abs(pc[1]/pc[0])<1: scores[0]=scores[0]+1
            if ps[0]>0 and abs(ps[1]/ps[0])<1: scores[1]=scores[1]+1
            if pv[0]>0 and abs(pv[1]/pv[0])<1: scores[2]=scores[2]+1
            outstr = "{0:6.3f}+/-{1:6.3f} ({2:4.2f}), {3:6.3f}+/-{4:6.3f} ({5:4.2f}), {6:6.3f}+/-{7:6.3f} ({8:4.2f})"
            print(outstr.format(pc[0], pc[1], abs(pc[1]/pc[0]), ps[0], ps[1], abs(ps[1]/ps[0]), pv[0], pv[1], abs(pv[1]/pv[0]) ))
            
            print scores
            
            plt.figure(figsize=(22,6), dpi=150); 
            plt.subplot(131)
            
            plt.plot(alist,clist,"bo"); 
            plt.plot(alist,alist*pc[0]+pc[2],"g");
            plt.title("$\\nu$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\nu$", fontsize=26)
            #plt.ylim(1,4)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'nuvss.png', dpi=150)
            #plt.show()
            
            plt.subplot(132)
            #plt.figure(figsize=(6,6)); 
            plt.plot(alist,slist,"bo"); 
            plt.plot(alist,alist*ps[0]+ps[2],"g");
            plt.title("$\\lambda_1$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\lambda_1$", fontsize=26)
            #plt.ylim(1,3)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'lamvss.png', dpi=150)
            #plt.show()
            
            plt.subplot(133)
            #plt.figure(figsize=(6,6)); 
            plt.plot(alist,vlist,"bo"); 
            plt.plot(alist,alist*pv[0]+pv[2],"g");
            plt.title("$\\sigma^2$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\sigma^2$", fontsize=26)
            #plt.ylim(1,3)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'sigvss.png', dpi=300)
            plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'indicators500.png', dpi=150)
            plt.show()
            
            """
            plt.figure(figsize=(5,4)); 
            plt.plot(alist,aclist,"bo"); 
            plt.plot(alist,alist*pac[0]+pac[2],"g");
            plt.title("Autocorrelation vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("Autocorrelation", fontsize=26)
            #plt.ylim(1,3)
            #plt.savefig(workDir + record + str(col+1) + order + cutoff + 'sigvss.png', dpi=150)
            plt.show()
            """
            plt.clf() # Free up memory    
        datafp.close()
Example #3
0
def fullAnalysis(recordList,
                 workDir='',
                 lowPassFilter=None,
                 sampleNumCol=0,
                 embedDim=20,
                 N=50):
    """
    Perform the full ssa analysis, with graphs saved to workDir
    N is window size
    lowPassFilter is expected to be a list/array of [order, cutoff],
                  where cutoff is between 0 to 1, as digital filter
    Set sampleNumCol to 1 if they first column of the data file is simply the data index
    Ensure working directory exists
    Note that data file must be contained in the working directory
    ex. if recordList[0] = '31' then data file is '31.txt' in working directory
    """
    if not os.path.exists(workDir[:len(workDir) - 1]):
        os.makedirs(workDir[:len(workDir) - 1])

    for record in recordList:
        datamgr.processData(record, workDir,
                            sampleNumCol)  # Ensure hdf5 file for record exists
        datafp = datamgr.accessData(record, workDir)
        data = datafp[record]  # numpy array to be used
        rows, cols = data.shape

        for col in xrange(0, cols):

            # Low Pass Filter
            order, cutoff = None, None
            if lowPassFilter is not None:
                order = lowPassFilter[0]
                cutoff = lowPassFilter[1]
                b, a = signal.butter(order,
                                     cutoff,
                                     btype='lowpass',
                                     analog=False,
                                     output='ba')
                data[:, col] = signal.filtfilt(b, a, data[:, col])

                # For saving images with cutoff + order info
                order = 'Order' + str(order)
                cutoff = 'Cutoff' + str(cutoff)

            # Graphs
            plt.figure(figsize=(12, 3))
            steps = 1
            if len(data) >= 100e3:
                steps = 10
            plt.plot(data[::steps, col], linewidth=0.5)
            plt.xlabel('Sample Number', fontsize=20)
            plt.ylabel('$x$', fontsize=26)
            plt.savefig(workDir + record + str(col + 1) + xstr(order) +
                        xstr(cutoff) + '.png',
                        dpi=150)
            plt.show()
            plt.clf()  # Free up memory

            clist = []
            slist = []
            vlist = []
            aclist = []
            scores = [0, 0, 0]
            P = len(data) / N  # window size
            r = embedDim  # embedding dimension
            for k in range(2 * N - 1):
                r = embedDim
                S = ssa(centre(data[int(k * P / 2):int((k + 2) * P / 2), col]),
                        r)  #int(P/2)
                L = (S / S[0])**2
                idc = array(range(1, r + 1))
                m = sum(L[:r])
                clist.append(dot(L[:r], idc) / m)
                slist.append(S[0]**2 / P)
                variance = var(data[int(k * P / 2):int((k + 2) * P / 2), col])
                vlist.append(variance)
                aclist.append(
                    dot(data[int(k * P / 2):int((k + 2) * P / 2) - 10, col],
                        data[int(k * P / 2) + 10:int((k + 2) * P / 2), col]) /
                    variance)

            # alist will be big, therefore create a dataset inside
            # the temporary datafp to store alist
            alist = datafp.require_dataset('time', (len(data), len(data)),
                                           dtype=int32)
            alist = linspace(0, len(data), len(clist))

            pc = slope(alist, clist, 0.05)
            ps = slope(alist, slist, 0.05)
            pv = slope(alist, vlist, 0.05)
            pac = slope(alist, aclist, 0.05)
            if pc[0] < 0 and abs(pc[1] / pc[0]) < 1: scores[0] = scores[0] + 1
            if ps[0] > 0 and abs(ps[1] / ps[0]) < 1: scores[1] = scores[1] + 1
            if pv[0] > 0 and abs(pv[1] / pv[0]) < 1: scores[2] = scores[2] + 1
            outstr = "{0:6.3f}+/-{1:6.3f} ({2:4.2f}), {3:6.3f}+/-{4:6.3f} ({5:4.2f}), {6:6.3f}+/-{7:6.3f} ({8:4.2f})"
            print(
                outstr.format(pc[0], pc[1], abs(pc[1] / pc[0]), ps[0], ps[1],
                              abs(ps[1] / ps[0]), pv[0], pv[1],
                              abs(pv[1] / pv[0])))

            print scores

            plt.figure(figsize=(22, 6), dpi=150)
            plt.subplot(131)

            plt.plot(alist, clist, "bo")
            plt.plot(alist, alist * pc[0] + pc[2], "g")
            plt.title("$\\nu$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\nu$", fontsize=26)
            #plt.ylim(1,4)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'nuvss.png', dpi=150)
            #plt.show()

            plt.subplot(132)
            #plt.figure(figsize=(6,6));
            plt.plot(alist, slist, "bo")
            plt.plot(alist, alist * ps[0] + ps[2], "g")
            plt.title("$\\lambda_1$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\lambda_1$", fontsize=26)
            #plt.ylim(1,3)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'lamvss.png', dpi=150)
            #plt.show()

            plt.subplot(133)
            #plt.figure(figsize=(6,6));
            plt.plot(alist, vlist, "bo")
            plt.plot(alist, alist * pv[0] + pv[2], "g")
            plt.title("$\\sigma^2$ vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("$\\sigma^2$", fontsize=26)
            #plt.ylim(1,3)
            #plt.savefig(workDir + record + str(col+1) + xstr(order) + xstr(cutoff) + 'sigvss.png', dpi=300)
            plt.savefig(workDir + record + str(col + 1) + xstr(order) +
                        xstr(cutoff) + 'indicators500.png',
                        dpi=150)
            plt.show()
            """
            plt.figure(figsize=(5,4)); 
            plt.plot(alist,aclist,"bo"); 
            plt.plot(alist,alist*pac[0]+pac[2],"g");
            plt.title("Autocorrelation vs $s$", fontsize=26)
            plt.xlabel("$s$", fontsize=26)
            plt.ylabel("Autocorrelation", fontsize=26)
            #plt.ylim(1,3)
            #plt.savefig(workDir + record + str(col+1) + order + cutoff + 'sigvss.png', dpi=150)
            plt.show()
            """
            plt.clf()  # Free up memory
        datafp.close()