def main(fileName):
    #Create a big array of all the data
    window_size = 100
    window_shift = 100
        
    # Load the data file
    ll = labviewloader.LabViewLoader()
    ll.load(fileName)
    
    #Get the first channel
    data = ll.getDataCol(0)
    data = [float(item) for item in data]
    
    #Break it into a list of lists, and that into a numpy array
    input_data = np.array(sliceList(data, window_size, window_shift))
    
    #Calculate the sample mean vector and the std dev vector
    sample_mean = np.mean(input_data, axis=0)
    sample_std_dev = np.std(input_data, axis=0)
    
    #Subtract the sample mean from the data and divide by the std dev. 
    zero_mean = []
    for row in input_data:
        zero_mean.append((row - sample_mean)/sample_std_dev)
    zero_mean = np.array(zero_mean)
    
    #Compute the covariance matrix
    coVar = np.cov(zero_mean)
    
    #Do the SVD
    U, s, V = np.linalg.svd(coVar, full_matrices=True)
    #U, s, V = np.linalg.svd(input_data, full_matrices=True)
    plt.scatter(U[0:,0], U[0:,1])
    '''
    for label, x, y in zip(labels, U[:,0], U[:,1]):
        plt.annotate(
            label, 
            xy = (x, y), xytext = (-3, 3),
            textcoords = 'offset points', ha = 'right', va = 'bottom')
    '''
    outfile  = "./PCA_" + os.path.basename(fileName).split(".")[0] + ".png"
    plt.savefig(outfile)
    def load(self, fName):
        # Handle CSV file
        if fName.endswith(".csv"):
            self.channels = 60
            self.samples = 0
            self.deltaT = 0.001
            self.data = [[] * channels]
            # Assemble the columns, each column is one channel
            with open(fName) as infile:
                reader = csv.read(infile,
                                  delimiter=",",
                                  quoting=csv.QUOTE_NONE)
                for row in reader:
                    for ii in range(0, channels):
                        self.data[ii].append(float(row[ii]))
                    #Count rows
                    self.samples += 1

        else:
            # Load a labview file
            ll = labviewloader.LabViewLoader()
            ll.load(fName)

            # Load the parameters from the file
            self.channels = int(ll.getHeaderValue("Channels", 1))
            self.samples = int(ll.getHeaderValue("Samples", 1))
            self.deltaT = float(ll.getHeaderValue("Delta_X", 1))

            # collect all the data as floats
            self.data = []
            for channel in range(0, self.channels):
                temp = ll.getDataCol(channel)
                accumulator = []
                for value in temp:
                    accumulator.append(float(str(value)))
                self.data.append(accumulator)
Beispiel #3
0
                        ec="none",
                        fc=colors[label])

    fig.savefig(name)
    plt.close()


def indexToShift(window_size, window_shift, deltaT):
    return lambda x: window_shift * x * deltaT


if __name__ == '__main__':

    #Get a labview lvm file off the command line and load it
    infile = sys.argv[1]
    ll = labviewloader.LabViewLoader()
    ll.load(infile)

    #Get the window size and shift between windows
    window_size = int(sys.argv[2])
    window_shift = int(sys.argv[3])

    #Get a channel, zero mean and unit norm it, and convert to floats
    channel = int(sys.argv[4])
    #     channel = random.randrange(60)
    data = ll.getDataCol(channel)
    data = zmun.zeroMeanUnitNorm(data)
    data = [float(item) for item in data]

    #Get the timestep between samples
    deltaT = float(ll.getHeaderValue("Delta_X", 0))
def main(fileName, window_size, window_shift):
    #Create a big array of all the data
    #100/20 was pretty good
    #window_size = 10
    #window_shift = 5
    print "Processing {0} with window size {1} and shift {2}".format(fileName, window_size, window_shift)
    input_data = []
        
    #Work with labview files
    if fileName.endswith(".lvm"):    
        # Load the data file
        ll = labviewloader.LabViewLoader()
        ll.load(fileName)
        
        #Get the first channel
        data = ll.getDataCol(0)
        data = [float(item) for item in data]
        
        #Get the timestep between samples
        deltaT = float(ll.getHeaderValue("Delta_X",0))
        
        #Break it into a list of lists, and that into a numpy array
        input_data = sliceList(data, window_size, window_shift, deltaT)
    #Work with averaged lvm files
    elif fileName.endswith(".avg"):
        data = []
        with open(fileName, "r") as infile:
            for line in infile:
                data.append(float(line))
        deltaT = 0.001
        input_data = sliceList(data, window_size, window_shift, deltaT)
    else:
        print "This script only works on LVM files and averaged data (.avg files)"
        sys.exit()
          
    #Adapt cluster count based on data set size
    clusterCount = len(input_data)/50
    #Cluster the data 
    clusters = maxmincluster(input_data, clusterCount)
    
    #Scribble it out to a file
    with open("minmax{0}_{1}.p".format(window_size, window_shift), "w") as outfile:
        pickle.dump(clusters, outfile)
    
    index = 0
    data_to_id = {}
    for cluster in clusters:
        exemplar = cluster[0]
        members = cluster[1]
        
        #Plot the exemplar, then plot the members under it
        plt.figure(figsize=(20,25))
        #The head gets a separate plot
        plt.subplot(2,1,1)
        plt.plot(exemplar[0])
        #The members get plotted together
        plt.subplot(2,1,2)
        for member in members:
            plt.plot(member[0])
            
            #Turn the cluster data inside-out to make a lookup table for 
            #converting each item of input data into its respective cluster
            #identifier. This only uses the start and end times because
            #the data is in an unhashable data type. 
            data_to_id[(member[1],member[2])] = index

        #Write to a file
        plt.savefig("cluster_{0}.png".format(index))
        plt.close()
        index += 1
    
    #Use the data and the lookup table to build 
    #a list of of cluster IDs that represents the data
    with open("clusterList.csv", "w") as outfile:
        for sampleSet in input_data:
            outfile.write(str(data_to_id[(sampleSet[1],sampleSet[2])]) + "\n")