def main(fileName): #Create a big array of all the data window_size = 100 window_shift = 100 # Load the data file ll = labviewloader.LabViewLoader() ll.load(fileName) #Get the first channel data = ll.getDataCol(0) data = [float(item) for item in data] #Break it into a list of lists, and that into a numpy array input_data = np.array(sliceList(data, window_size, window_shift)) #Calculate the sample mean vector and the std dev vector sample_mean = np.mean(input_data, axis=0) sample_std_dev = np.std(input_data, axis=0) #Subtract the sample mean from the data and divide by the std dev. zero_mean = [] for row in input_data: zero_mean.append((row - sample_mean)/sample_std_dev) zero_mean = np.array(zero_mean) #Compute the covariance matrix coVar = np.cov(zero_mean) #Do the SVD U, s, V = np.linalg.svd(coVar, full_matrices=True) #U, s, V = np.linalg.svd(input_data, full_matrices=True) plt.scatter(U[0:,0], U[0:,1]) ''' for label, x, y in zip(labels, U[:,0], U[:,1]): plt.annotate( label, xy = (x, y), xytext = (-3, 3), textcoords = 'offset points', ha = 'right', va = 'bottom') ''' outfile = "./PCA_" + os.path.basename(fileName).split(".")[0] + ".png" plt.savefig(outfile)
def load(self, fName): # Handle CSV file if fName.endswith(".csv"): self.channels = 60 self.samples = 0 self.deltaT = 0.001 self.data = [[] * channels] # Assemble the columns, each column is one channel with open(fName) as infile: reader = csv.read(infile, delimiter=",", quoting=csv.QUOTE_NONE) for row in reader: for ii in range(0, channels): self.data[ii].append(float(row[ii])) #Count rows self.samples += 1 else: # Load a labview file ll = labviewloader.LabViewLoader() ll.load(fName) # Load the parameters from the file self.channels = int(ll.getHeaderValue("Channels", 1)) self.samples = int(ll.getHeaderValue("Samples", 1)) self.deltaT = float(ll.getHeaderValue("Delta_X", 1)) # collect all the data as floats self.data = [] for channel in range(0, self.channels): temp = ll.getDataCol(channel) accumulator = [] for value in temp: accumulator.append(float(str(value))) self.data.append(accumulator)
ec="none", fc=colors[label]) fig.savefig(name) plt.close() def indexToShift(window_size, window_shift, deltaT): return lambda x: window_shift * x * deltaT if __name__ == '__main__': #Get a labview lvm file off the command line and load it infile = sys.argv[1] ll = labviewloader.LabViewLoader() ll.load(infile) #Get the window size and shift between windows window_size = int(sys.argv[2]) window_shift = int(sys.argv[3]) #Get a channel, zero mean and unit norm it, and convert to floats channel = int(sys.argv[4]) # channel = random.randrange(60) data = ll.getDataCol(channel) data = zmun.zeroMeanUnitNorm(data) data = [float(item) for item in data] #Get the timestep between samples deltaT = float(ll.getHeaderValue("Delta_X", 0))
def main(fileName, window_size, window_shift): #Create a big array of all the data #100/20 was pretty good #window_size = 10 #window_shift = 5 print "Processing {0} with window size {1} and shift {2}".format(fileName, window_size, window_shift) input_data = [] #Work with labview files if fileName.endswith(".lvm"): # Load the data file ll = labviewloader.LabViewLoader() ll.load(fileName) #Get the first channel data = ll.getDataCol(0) data = [float(item) for item in data] #Get the timestep between samples deltaT = float(ll.getHeaderValue("Delta_X",0)) #Break it into a list of lists, and that into a numpy array input_data = sliceList(data, window_size, window_shift, deltaT) #Work with averaged lvm files elif fileName.endswith(".avg"): data = [] with open(fileName, "r") as infile: for line in infile: data.append(float(line)) deltaT = 0.001 input_data = sliceList(data, window_size, window_shift, deltaT) else: print "This script only works on LVM files and averaged data (.avg files)" sys.exit() #Adapt cluster count based on data set size clusterCount = len(input_data)/50 #Cluster the data clusters = maxmincluster(input_data, clusterCount) #Scribble it out to a file with open("minmax{0}_{1}.p".format(window_size, window_shift), "w") as outfile: pickle.dump(clusters, outfile) index = 0 data_to_id = {} for cluster in clusters: exemplar = cluster[0] members = cluster[1] #Plot the exemplar, then plot the members under it plt.figure(figsize=(20,25)) #The head gets a separate plot plt.subplot(2,1,1) plt.plot(exemplar[0]) #The members get plotted together plt.subplot(2,1,2) for member in members: plt.plot(member[0]) #Turn the cluster data inside-out to make a lookup table for #converting each item of input data into its respective cluster #identifier. This only uses the start and end times because #the data is in an unhashable data type. data_to_id[(member[1],member[2])] = index #Write to a file plt.savefig("cluster_{0}.png".format(index)) plt.close() index += 1 #Use the data and the lookup table to build #a list of of cluster IDs that represents the data with open("clusterList.csv", "w") as outfile: for sampleSet in input_data: outfile.write(str(data_to_id[(sampleSet[1],sampleSet[2])]) + "\n")