def plotCorrelationCircle(X, column, names): (Y, perc, comp) = pca(X,3) print("variance explained : " + str(perc)) # Calculate how important each feature was scr = np.dot(np.linalg.inv(np.diag(np.std(X, axis=0))),comp) # Scale results to match when we plot them scr = scr/np.linalg.norm(scr, axis=0) #scatter plot on principal components ##we need this function only to update the scatter plot when we select points def onpick(event, axes, Y): ind = event.ind axes.annotate(names[ind], (Y[ind, 0], Y[ind, 1])) plt.draw() fig, ax1 = plt.subplots() ax1.scatter(Y[:, 0], Y[:, 1], picker = True) ax1.add_artist(plt.Circle((0, 0), 1, color='r', fill = False)) # Etiquettage des points # for label, x, y in zip([item[0] for item in names], Y[:, 0], Y[:, 1]): # plt.annotate( # label, # xy = (x, y), xytext = (-1, 1), # textcoords = 'offset points', ha = 'right', va = 'bottom', # arrowprops = dict(arrowstyle = '-', connectionstyle = 'arc3,rad=0')) fig.canvas.mpl_connect('pick_event', partial(onpick, axes = ax1, Y = Y)) for i,v in enumerate(column): ax1.plot([0, scr[i,0]], [0, scr[i,1]], 'r-', linewidth=2,) plt.text(scr[i,0]* 1.00, scr[i,1] * 1.00, v, color='r', ha='center', va='center') ax1.axhline(y=0, color='k') ax1.axvline(x=0, color='k') ax1.xaxis.grid(True) ax1.yaxis.grid(True) plt.xlabel('1st Principal Component') plt.ylabel('2nd Principal Component') plt.show()
def plotPCA(X): perc_=[] for i in range(1,10): (Y,perc,comp)=pca(X,i) perc_.append(perc) fig, ax1 = plt.subplots(figsize=(14,6)) plt.plot(range(1,10),perc_,'b-',label="eigen percentage pca maison") plt.legend() plt.xlabel('number of components') plt.ylabel('percentages of variance explained') plt.show()
def plotPCA3D(X, column, names): (Y, perc, comp) = pca(X, 3) # Code sébastien chakra fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111, projection='3d') plt.rcParams['legend.fontsize'] = 10 ax.plot(Y[:, 0], Y[:, 1], Y[:, 2], 'o', markersize=8, color='blue', alpha=0.5, label='x') # ax.plot(class2_sample[0,:], class2_sample[1,:], class2_sample[2,:], '^', markersize=8, alpha=0.5, color='red', label='class2') # plt.title('Samples for class 1 and class 2') ax.legend(loc='upper right') plt.xlabel('1st Principal Component') plt.ylabel('2nd Principal Component') plt.show()
plt.ylabel('values') ### print("---------------------------") #arts is column 6 print(names[X[:, 6].argmax()]) plt.show() #normalize the data so that we can have uniform ranges in their values (prettier plotting too..we could standardize them) X = X / np.linalg.norm(X, axis=0) #perform PCA #percentage of the feature space according to eigenvalues perc_ = [] for i in range(1, 10): (Y, perc, comp) = pca(X, i) perc_.append(perc) fig, ax1 = plt.subplots(figsize=(14, 6)) plt.plot(range(1, 10), perc_, 'b-', label="eigen percentage") plt.xlabel('number of components') plt.ylabel('percentages') plt.show() (Y, perc, comp) = pca(X, 2) print("variance:" + str(perc)) #calculate how important each feature was scr = np.dot(np.linalg.inv(np.diag(np.std(X, axis=0))), comp) #scale results to match when we plot them scr = scr / np.linalg.norm(scr, axis=0)
plt.show() singlelinkage_clustering(X, 2) # plotPCA3D(X, column, names) clustering_analysis(X, knn_clustering(X, 5), plot = True) # Perform PCA (Y, perc, comp) = pca(X,1) # Plot percentage of the feature space according to eigenvalues plotPCA(X) # Plot Correlation Circle df_tmp = df coeffs = {} coeffs['rebounds'] = 0.5 coeffs['blocks'] = 0.5 coeffs['personal_fouls'] = 1 coeffs['assists'] = 1
#plot distribution over features fig, ax1 = plt.subplots(figsize=(14, 6)) data.boxplot(column=column) ax1.xaxis.grid(False) ax1.set_yscale('log') plt.xlabel('parameters') plt.ylabel('values') ### print "---------------------------" print names[data['arts'].idxmax()] plt.show() #%% #perform PCA (Y, perc) = pca(X, 2) print "variance:" + str(perc) #calculate how important is feature was scr = np.dot(np.transpose(X), Y) #scale results to match when we plot them scr[:, 0] = scr[:, 0] / (scr.max() - scr.min()) scr[:, 1] = scr[:, 1] / (scr.max() - scr.min()) #scatter plot on principal components ##we need this function only to update the scatter plot when we select points def onpick(event, axes, Y): ind = event.ind axes.annotate(names[ind], (Y[ind, 0], Y[ind, 1])) plt.draw()
Axes3D n_points = 2000 X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0) #X, color = datasets.samples_generator.make_swiss_roll(n_points, random_state=0) n_components = 2 n_neighbors = 5 fig = plt.figure(figsize=(15, 8)) ax = fig.add_subplot(251, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral) ax.view_init(4, -72) #------PCA--------our implementation t0 = time() (Y, perc) = pcaImp.pca(X, n_components) t1 = time() print("PCA(imp): %.2g sec" % (t1 - t0)) ax = fig.add_subplot(252) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) plt.title("PCA(imp) (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') #----------------- #------MDS--------our implementation (classical MDS) t0 = time() D = spd.squareform(spd.pdist(X, 'euclidean')) Y = mdsImp.mds(D, n_components) t1 = time()