def handlePCA(self, event=None): print('handlePCA') self.PCAWindow = PCADialog(self.root, self.dataObj) if self.PCAWindow.headers == None: tkMessageBox.showerror('No File Opened!', 'Please open a file first') return if self.PCAWindow.result == None: return self.PCAObjects = [] #check whether normalization is required if self.PCAWindow.result[0] == 1: self.PCAObjects.append(analysis.pca(self.dataObj, self.PCAWindow.result[1])) else: self.PCAObjects.append(analysis.pca(self.dataObj, self.PCAWindow.result[1], False)) #analysis name if self.PCAWindow.result[2] == None: PCAName = "PCA" + str(self.PCANum) self.PCANum += 1 else: PCAName = self.PCAWindow.result[2] self.pcaBoxA.insert(tk.END, PCAName)
def handlePCA(self, event=None): self.PCAdialog = PCADialogBox(self.root, self.data) if len(self.PCAdialog.headers) == 0: print "Select a file" self.handleOpen() if self.PCAdialog.getDatacols() == None: return an.pca(self.data, self.PCAdialog.getDatacols())
def showPCA(self, event=None): if self.box != None and len(self.box.curselection()) > 0: selected = self.box.get(self.box.curselection()[0]) dataInfo = self.results[selected] newData = None if dataInfo[2] == 1: newData = analysis.pca(dataInfo[0], dataInfo[1]) else: newData = analysis.pca(dataInfo[0], dataInfo[1], normalize=False) print newData.get_eigenvalues() PCAShowDialog(self.root,newData) else: tkMessageBox.showwarning("Instructions", "Please run a PCA analysis, then select a data set to examine")
def get_signal_distribution(signal_list, pca_one): first_pc_normal_vector = [] second_pc_normal_vector = [] first_pc_one = pca_one.components_[0] second_pc_one = pca_one.components_[1] for i in range(len(signal_list)): pca = analysis.pca(signal_list[i], 1) first_pc = pca.components_[0] cos = np.dot(first_pc, first_pc_one) / (np.sqrt( (pow(first_pc[0], 2) + pow(first_pc[1], 2) + pow(first_pc[2], 2))) * np.sqrt( (pow(first_pc_one[0], 2) + pow(first_pc_one[1], 2) + pow(first_pc_one[2], 2)))) first_pc_normal_vector.append(np.rad2deg(np.arccos(cos))) second_pc = pca.components_[0] cos = np.dot(second_pc, second_pc_one) / ((abs( pow(second_pc[0], 2) + abs(pow(second_pc[1], 2) + abs(pow(second_pc[2], 2))))) + (abs( pow(second_pc_one[0], 2) + abs(pow(second_pc_one[1], 2) + abs(pow(second_pc_one[2], 2))))) ) second_pc_normal_vector.append(np.rad2deg(np.arccos(cos))) print first_pc_normal_vector return first_pc_normal_vector, second_pc_normal_vector
def handlePCA(self,event=None): self.PCAdialog = PCADialogBox(self.root, self.data) self.pcaList.append(an.pca(self.data,self.PCAdialog.getDatacols(),self.PCAdialog.getNormalized())) if self.PCAdialog.getName() == None: name="PCA #" + str(self.num_PCA) self.num_PCA +=1 else: name=self.PCAdialog.getName() print name self.AnalysisWindow.insert(tk.END,name) if self.PCAdialog.headers == None: print "Select a file" self.handleOpen() if self.PCAdialog.getDatacols() == None: return print "data: ", self.PCAdialog.getDatacols()
def handlePCA(self): #getting the name of the analysis nd = NameDialog(self.root) name = nd.get_name() self.pca_analysis = True self.headers = self.cols.curselection() #get rid of anything we don't care about for i in range(len(self.headers)): if self.headers[i] == 'None': del self.headers[i] #making the pca object new_pca = analysis.pca(self.data, self.headers, prenormalize=True) self.PCAs.append(new_pca) #print out all of the values print("\n\nValues from PCA:") print("\neigenvalues:\n",new_pca.get_eigenvalues()) print("\neigenvectors:\n",new_pca.get_eigenvectors()) print("\nmeans:\n",new_pca.get_original_means()) print("\nheaders:\n",new_pca.get_original_headers()) #naming the pca and inserting it if name == "Default": self.PCAlistbox.insert(tk.END, "PCA" + str(self.PCAlistbox.size())) else:self.PCAlistbox.insert(tk.END, name)
def plotPCA(self,event=None): if len(self.box.curselection()) > 0: selected = self.box.get(self.box.curselection()[0]) dataInfo = self.results[selected] newData = None print dataInfo[1] if dataInfo[2] == 1: newData = analysis.pca(dataInfo[0], dataInfo[1]) else: newData = analysis.pca(dataInfo[0], dataInfo[1], normalize=False) temp = self.data self.data = newData self.buildPoints( self.handleChooseAxes() ) self.data = temp else: tkMessageBox.showwarning("Instructions", "Please select an analysis")
def pca_rmse(target_id, gesture): # data processing data_list = np.array( dp.db_extract_list_signal_downsampling(gesture, target_id)) # data_list = np.array(dp.db_extract_list_signal_normalization(gesture , target_id)) data_rms_score = [] for i in range(data_list.shape[0]): train_data = data_list[i] train_pca = analysis.pca(train_data, 3) train_first_pc = train_pca.components_[0] train_new_data = sp.rotation_signal(target_first_pc, train_first_pc, train_data) train_new_data_pca = analysis.pca(train_new_data, 3) train_new_data_first_pc = train_new_data_pca.components_[0] data_rms_score.append(sp.rmse(target_data, train_new_data)) return data_rms_score
def handlePCA(self): if self.data == None: self.handleOpen() else: col = PCAColDialog(self.root, self.data.get_headers()) if len(col.result) > 0: name = PCANameDialog(self.root) if name.result != None: headers = [] headersNumeric = self.data.get_headers() for i in col.result[0]: headers.append(headersNumeric[i]) self.pcaList.append(analysis.pca(self.data, headers, normalize=col.result[1])) self.pcaBox.insert(tk.END, name.result)
def stander_pca(): target_list = [1,100,207] tr_id = target_list[0] tr_data = dp.db_extract_one_signal_downsampling(tr_id) # plt.plot(tr_data[:,0]) # plt.plot(tr_data[:,1]) # plt.plot(tr_data[:,2]) # plt.show() tr_pca = analysis.pca(tr_data, 3) tr_first_pc = tr_pca.components_[0] tl_id = target_list[1] tl_data = dp.db_extract_one_signal_downsampling(tl_id) tl_pca = analysis.pca(tl_data, 3) tl_first_pc = tl_pca.components_[0] tf_id = target_list[2] tf_data = dp.db_extract_one_signal_downsampling(tf_id) tf_pca = analysis.pca(tf_data, 3) tf_first_pc = tf_pca.components_[0] return tr_data, tl_data, tf_data, tr_first_pc, tl_first_pc, tf_first_pc
def main(filename, numberOfComponents): df = utilities.readDataFile(filename) df = utilities.getDataWithTimeIndex(df) df = df.dropna() subdir = filename.split('/')[-2] columns, relevantColumns, labelNames, columnUnits, timestamps = getConfig( subdir) if relevantColumns is not None: df = utilities.dropIrrelevantColumns(df, [relevantColumns, labelNames]) prints.printEmptyLine() pca = analysis.pca(df, numberOfComponents, relevantColumns, labelNames) prints.printExplainedVarianceRatio(pca)
def handlePCA(self): if self.data == None: self.handleOpen() else: self.pcacount += 1 d = ColDialog(self.root, self.data.getHeaders()) if len(d.result) > 0: w = NameDialog(self.root, self.pcacount) if w.result != None: headers = [] for i in d.result[0]: headers.append(self.data.headersNumeric[i]) print self.data.headersNumeric[i] self.pcaList.append(analysis.pca(self.data, headers, normalize = d.result[1])) self.pcaBox.insert(tk.END, w.result) self.pcaList[len(self.pcaList)-1].toFile()
def pl_data(pl_name, username, token=None): '''returns Dict of specified playlist with all songs and features ''' playlist = existing_playlist(pl_name, username, token) if playlist == "": return "" features = get_song_features(feature(playlist, 'id')) songs = clean_data(playlist['songs'], features) means = analysis.simple_stats(songs) pca_data = analysis.pca(songs) tsne_data = analysis.tSNE(songs) ## DEBUG songs = analysis.merge_pca(songs, pca_data['coords']) songs = analysis.merge_tsne(songs, tsne_data) return {'songs': songs, 'means': means, 'pcaweights': pca_data['weights']}
def createPCA(self): if self.data == None: print "you don't have data" return variables = Dialogs.PCADialog(self.root, self.data.get_headers()) if variables.result == [] and not variables.all: print "you didn't pick anything" return headers = variables.result if variables.all: headers = self.data.get_headers() pca = analysis.pca(self.data, headers, variables.normalize) self.PCA = pca if pca not in self.PCAanalysis: self.PCAs +=1 self.PCAanalysis.append(pca) self.PCAListbox.insert(tk.END, "PCA%d"%(self.PCAs))
def pca_rotation(normalization_x,normalization_y,normalization_z,tr_first_pc, tl_first_pc, tf_first_pc): x, y, z = normalization_x[0],normalization_y[0],normalization_z[0] normalization = [] for i in range(50): normalization.append([x[i],y[i],z[i]]) normalization = np.array(normalization) train_data = normalization train_pca = analysis.pca(train_data, 3) train_first_pc = train_pca.components_[0] train_to_tr = sp.rotation_signal(tr_first_pc,train_first_pc ,train_data) train_to_tl = sp.rotation_signal(tl_first_pc,train_first_pc ,train_data) train_to_tf = sp.rotation_signal(tf_first_pc,train_first_pc ,train_data) return train_to_tr, train_to_tl, train_to_tf
def pca_rotation( downsampling_x,downsampling_y,downsampling_z, tr_first_pc ): x, y, z = downsampling_x,downsampling_y,downsampling_z downsampling = [] for i in range(50): downsampling.append([x[i],y[i],z[i]]) downsampling = np.array(downsampling) # print downsampling train_data = downsampling train_pca = analysis.pca(train_data, 3) train_first_pc = train_pca.components_[0] # print train_data train_to_tr = sp.rotation_signal(tr_first_pc,train_first_pc ,train_data) # train_to_tl = sp.rotation_signal(tl_first_pc,train_first_pc ,train_data) # train_to_tf = sp.rotation_signal(tf_first_pc,train_first_pc ,train_data) return train_to_tr
# Spring 2015 # CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": if len(sys.argv) < 2: print 'Usage: python %s <data file>' % (sys.argv[0]) exit() data = data.Data(sys.argv[1]) pcadata = analysis.pca(data, data.get_headers(), False) print "\nOriginal Data Headers" print pcadata.get_data_headers() print "\nOriginal Data", print data.get_data(data.get_headers()) print "\nOriginal Data Means" print pcadata.get_data_means() print "\nEigenvalues" print pcadata.get_eigenvalues() print "\nEigenvectors" print pcadata.get_eigenvectors() print "\nProjected Data" print pcadata.get_data(pcadata.get_headers())
raw_data = dp.db_extract_one_signal(id) plt.plt_raw(raw_data , "Single Dataset", "raw_data") # Single dataset that after standardization and filter filter_data = dp.db_extract_one_signal_filter(id) plt.plt_raw(filter_data , "Single Dataset(after Filter)", "filter_data") # Single dataset that after standardization ,filter ,resampling downsampling_data = dp.db_extract_one_signal_downsampling(id) plt.plt_raw(downsampling_data, "Single Dataset(after Filter and Resampling)","resampling_data") # One dataset that after segmentation and downsampling in 3D (timeseries color) plt.plt_data_3d_alpha(downsampling_data , "Single Dataset(after Filter and Resampling) in 3D","resampling_data_3D") plt.plt_line_collection(downsampling_data ,"Single Dataset Exploded View","resampling_data_exploded") # One dataset that after segmentation in 3D , and draw the first pc in figure pca = analysis.pca(downsampling_data, 3) first_pc = pca.components_[0] transform_data = pca.transform(downsampling_data) plt.plt_pca_3d(downsampling_data , transform_data , first_pc, "Ground Truth Dataset in 3D and PCA First Eigenvector","groundtruth_data_3D_pca") # # test_id = 3 # test_data = dp.db_extract_one_signal(test_id) test_downsampling_data = dp.db_extract_one_signal_downsampling(test_id) test_pca = analysis.pca(test_downsampling_data, 3) test_first_pc = test_pca.components_[0] test_transform_data = test_pca.transform(test_downsampling_data) plt.plt_pca_3d(test_downsampling_data , test_transform_data , test_first_pc, "Single Dataset 3D and PCA First Eigenvector","test_data_3D_pca") plt.plt_line_collection(test_downsampling_data ,"Single Dataset Exploded View","test_data_exploded")
# Spring 2015 # CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": if len(sys.argv) < 2: print('Usage: python %s <data file>' % (sys.argv[0])) exit() data = data.Data( sys.argv[1] ) pcadata = analysis.pca( data, data.get_headers(), False ) print("\nOriginal Data Headers") print(pcadata.get_data_headers()) print("\nOriginal Data") print(data.get_data( data.get_headers() )) print("\nOriginal Data Means") print(pcadata.get_data_means()) print("\nEigenvalues") print(pcadata.get_eigenvalues()) print("\nEigenvectors") print(pcadata.get_eigenvectors()) print("\nProjected Data") print(pcadata.get_data(pcadata.get_headers()))
# Read train dataset train = pd.read_csv(train_file) # Extract features for pda and lda train_feature_values = train[feature_columns].values # Extract target values (Class values) for pda and lda target_values = train['Class'].values # lda without pca lda1, lda_without_pca_df = analysis.lda(train_feature_values, target_values) print lda1.score(train_feature_values, target_values) # draw result draw(lda_without_pca_df, 'Wine Classification - LDA without PCA') # apply pca principal_components = analysis.pca(train_feature_values) # after pca apply lda lda2, lda_df = analysis.lda(principal_components, target_values) print lda2.score(principal_components, target_values) # draw result draw(lda_df, 'Wine Classification - LDA with PCA') exit(0)
def main(argv): if len(argv) < 3: print('usage: python %s <Train CSV file> <Train categories CSV file>' % (argv[0])) exit(-1) # read the features and categories data sets print('Reading %s and %s' % (argv[1], argv[2])) try: d = data.Data(argv[1]) except: print('Unable to open %s' % (argv[1])) exit(-1) try: catdata = data.Data(argv[2]) except: print('Unable to open %s' % (argv[2])) exit(-1) # execute PCA analysis print('Executing PCA') pcadata = an.pca( d, d.get_headers() ) print('Evaluating eigenvalues') # identify how many dimensions it takes to represent 90% of the variation evals = pcadata.get_eigenvalues() esum = np.sum(evals) cum = evals[0,0] cumper = cum / esum i = 1 while cumper < 0.9: cum += evals[0,i] cumper = cum/esum i += 1 print('Dimensions to reach 90% of variation:', i) cheaders = pcadata.get_headers()[:i] # cluster the data K = 6 # Use the average of each category as the initial means truecats = catdata.get_data(catdata.get_headers()[0:1]) tmpcats = truecats - 1 print('Clustering to %d clusters' % (K)) codebook, codes, errors = an.kmeans(pcadata, cheaders, K, categories=tmpcats) # build a confusion matrix confmtx = [[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]] for i in range(codes.shape[0]): confmtx[int(codes[i, 0])][int(truecats[i, 0])-1] += 1 print("\nConfusion Matrix:\n") print('Actual-> Walking Walk-up Walk-dwn Sitting Standing Laying') for i in range(len(confmtx)): s = 'Cluster %d' % (i) for val in confmtx[i]: s += "%10d" % (val) print(s) print()
plt.plot(dent * (dbins - 1), 'w') mm = dat_mean / 0.9 * (dbins - 1) plt.plot(mm, 'r') plt.xlim(0, len(dat_mean) - 1) plt.ylim(0, dbins - 1) plt.tight_layout() # plot all distribution / cluster distributions !! plt.figure(8) for b in range(nbins): plt.plot(dist[:, b]) C, T, pvar = als.pca(dist) plt.figure(9) plt.clf() plt.subplot(2, 2, 1) plt.imshow(C, interpolation='none', aspect='auto', cmap='viridis') plt.title('spatial principal components') plt.colorbar(pad=0.01, fraction=0.01) ax = plt.subplot(2, 2, 2) plt.imshow(T, interpolation='none', aspect='auto', cmap='viridis') plt.colorbar(pad=0.01, fraction=0.01) plt.title('temporal principal components') plt.subplot(2, 2, 3) plt.plot(100 - pvar) plt.title('variance explained') plt.subplot(2, 2, 4, sharex=ax)
from sklearn.preprocessing import StandardScaler import sys import csv from scipy.stats import pearsonr from scipy.stats import spearmanr import data import matplotlib.pyplot as plt import mplcursors import analysis dobj = data.Data("premierleague.csv") headers = dobj.get_headers()[1:] position = dobj.limit_columns(['position']).T d = dobj.limit_columns(headers).T pcadobj = analysis.pca(dobj, headers) evals = pcadobj.get_eigenvalues() evecs = pcadobj.get_eigenvectors() pca_headers = pcadobj.get_headers() eigensum = sum(evals) d = pcadobj.limit_columns(pca_headers).T print('evals shape', evals.shape) print('evecs shape', evecs.shape) print('pca_headers') first_row = ["E-vec", "E-val", 'cumulative'] first_row.extend(pcadobj.get_original_headers()) with open('heresyourdata.csv', mode='w') as file: writer = csv.writer(file) writer.writerow(first_row)
def buildPCA(self, cols, name): pcad = analysis.pca(self.data, cols) self.pcaObjects.append(pcad) self.pcaMenu.insert(tk.END, name) self.drawPCA(pcad)
def pca(df, numberOfComponents, relevantColumns=None, columnDescriptions=None): return analysis.pca(df, numberOfComponents, relevantColumns, columnDescriptions)
def main(argv): if len(argv) < 3: print 'usage: python %s <Train CSV file> <Train categories CSV file>' % (argv[0]) exit(-1) # read the features and categories data sets print 'Reading %s and %s' % (argv[1], argv[2]) try: d = data.Data(argv[1]) except: print 'Unable to open %s' % (argv[1]) exit(-1) try: catdata = data.Data(argv[2]) except: print 'Unable to open %s' % (argv[2]) exit(-1) # execute PCA analysis print 'Executing PCA' pcadata = an.pca( d.getHeaderRaw(), d ) print 'Evaluating eigenvalues' # identify how many dimensions it takes to represent 90% of the variation evals = pcadata.getEigenvalues() esum = np.sum(evals) cum = evals[0] cumper = cum / esum i = 1 while cumper < .999: cum += evals[i] cumper = cum/esum i += 1 print 'Dimensions to reach 99.9% of variation:', i cheaders = pcadata.getHeaderRaw()[:i] # cluster the data K = 6 # Use the average of each category as the initial means truecats = catdata.getDataNum(catdata.getHeaderRaw()[0:1]) tmpcats = truecats - 1 print "categoroties:", #tmpcats.tolist() print 'Clustering to %d clusters' % (K) # print pcadata # print cheaders # print K codebook, codes, errors = an.kmeans(pcadata, cheaders, K, categories = tmpcats) print len(codes) print codebook # build a confusion matrix confmtx = [[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]] codes = codes.astype(int) truecats = truecats.astype(int) print len(codes) print len(truecats) for i in range(codes.shape[0]): confmtx[codes[i,0]][int(truecats[i,0])-1] += 1 print "\nConfusion Matrix:\n" print 'Actual-> Walking Walk-up Walk-dwn Sitting Standing Laying' for i in range(len(confmtx)): s = 'Cluster %d' % (i) for val in confmtx[i]: s += "%10d" % (val) print s print
def main(argv): if len(argv) < 3: print 'usage: python %s <Train CSV file> <Train categories CSV file>' % (argv[0]) exit(-1) # read the features and categories data sets print 'Reading %s and %s' % (argv[1], argv[2]) try: d = data.Data(argv[1]) except: print 'Unable to open %s' % (argv[1]) exit(-1) try: catdata = data.Data(argv[2]) except: print 'Unable to open %s' % (argv[2]) exit(-1) # execute PCA analysis print 'Executing PCA' pcadata = an.pca( d, d.get_headers(), False ) print 'Evaluating eigenvalues' # identify how many dimensions it takes to represent 90% of the variation evals = pcadata.get_eigenvalues() #print "type:",type(evals) evals=np.asmatrix(evals) #print "type2:",type(evals) #print "shape: ",evals.shape esum = np.sum(evals) cum = evals[0,0] cumper = cum / esum i = 1 while cumper < 0.999: cum += evals[0,i] cumper = cum/esum i += 1 print 'Dimensions to reach 99.9% of variation:', i cheaders = pcadata.get_headers()[:i] # cluster the data K = 6 # Use the average of each category as the initial means truecats = catdata.get_data(catdata.get_headers()[0:1]) #tmpcats = truecats - 1 tmpcats = truecats # Don't adjust if we're using corrected labels print 'Clustering to %d clusters' % (K) codebook, codes, errors = an.kmeans(pcadata, cheaders, K, categories = tmpcats) # build a confusion matrix confmtx = [[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]] for i in range(codes.shape[0]): #confmtx[codes[i,0]][int(truecats[i,0])-1] += 1 confmtx[codes[i,0]][int(truecats[i,0])] += 1 # don't adjust print "\nConfusion Matrix:\n" print 'Actual-> Walking Walk-up Walk-dwn Sitting Standing Laying' for i in range(len(confmtx)): s = 'Cluster %d' % (i) for val in confmtx[i]: s += "%10d" % (val) print s print
# Spring 2015 # CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": # if len(sys.argv) < 2: # print 'Usage: python %s <data file>' % (sys.argv[0]) # exit() d = data.Data("datasets/pcatest.csv") pcadata = analysis.pca(d.getHeaderRaw(), d, False) print "\nOriginal Data Headers" print pcadata.getDataHeaders()[2:] print "\nOriginal Data", print d.getDataNum(d.getHeaderRaw()) print "\nOriginal Data Means" print pcadata.getDataMeans() print "\nEigenvalues" print pcadata.getEigenvalues() print "\nEigenvectors" print pcadata.getEigenvectors() print "\nProjected Data" print pcadata.getDataNum(pcadata.getHeaderRaw())