Ejemplo n.º 1
0
def plotCorrelationCircle(X, column, names):
    (Y, perc, comp) = pca(X,3)
    print("variance explained : " + str(perc))
    # Calculate how important each feature was
    scr = np.dot(np.linalg.inv(np.diag(np.std(X, axis=0))),comp)
    # Scale results to match when we plot them
    scr = scr/np.linalg.norm(scr, axis=0)
    #scatter plot on principal components
    ##we need this function only to update the scatter plot when we select points
    def onpick(event, axes, Y):
    	ind = event.ind
    	axes.annotate(names[ind], (Y[ind, 0], Y[ind, 1]))
    	plt.draw()
    fig, ax1 = plt.subplots()
    ax1.scatter(Y[:, 0], Y[:, 1], picker = True)
    ax1.add_artist(plt.Circle((0, 0), 1, color='r', fill = False))
    # Etiquettage des points
    # for label, x, y in zip([item[0] for item in names], Y[:, 0], Y[:, 1]):
    #     plt.annotate(
    #         label,
    #         xy = (x, y), xytext = (-1, 1),
    #         textcoords = 'offset points', ha = 'right', va = 'bottom',
    #         arrowprops = dict(arrowstyle = '-', connectionstyle = 'arc3,rad=0'))
    fig.canvas.mpl_connect('pick_event', partial(onpick, axes = ax1, Y = Y))
    for i,v in enumerate(column):
    	ax1.plot([0, scr[i,0]], [0, scr[i,1]], 'r-', linewidth=2,)
    	plt.text(scr[i,0]* 1.00, scr[i,1] * 1.00, v, color='r', ha='center', va='center')
    ax1.axhline(y=0, color='k')
    ax1.axvline(x=0, color='k')
    ax1.xaxis.grid(True)
    ax1.yaxis.grid(True)
    plt.xlabel('1st Principal Component')
    plt.ylabel('2nd Principal Component')
    plt.show()
Ejemplo n.º 2
0
def plotPCA(X):
    perc_=[]
    for i in range(1,10):
    	(Y,perc,comp)=pca(X,i)
    	perc_.append(perc)

    fig, ax1 = plt.subplots(figsize=(14,6))
    plt.plot(range(1,10),perc_,'b-',label="eigen percentage pca maison")
    plt.legend()
    plt.xlabel('number of components')
    plt.ylabel('percentages of variance explained')
    plt.show()
Ejemplo n.º 3
0
def plotPCA3D(X, column, names):
    (Y, perc, comp) = pca(X, 3)

    # Code sébastien chakra
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111, projection='3d')
    plt.rcParams['legend.fontsize'] = 10
    ax.plot(Y[:, 0], Y[:, 1], Y[:, 2], 'o', markersize=8, color='blue', alpha=0.5, label='x')
    # ax.plot(class2_sample[0,:], class2_sample[1,:], class2_sample[2,:], '^', markersize=8, alpha=0.5, color='red', label='class2')

    # plt.title('Samples for class 1 and class 2')
    ax.legend(loc='upper right')
    plt.xlabel('1st Principal Component')
    plt.ylabel('2nd Principal Component')

    plt.show()
Ejemplo n.º 4
0
plt.ylabel('values')

###
print("---------------------------")
#arts is column 6
print(names[X[:, 6].argmax()])
plt.show()

#normalize the data so that we can have uniform ranges in their values (prettier plotting too..we could standardize them)
X = X / np.linalg.norm(X, axis=0)

#perform PCA
#percentage of the feature space according to eigenvalues
perc_ = []
for i in range(1, 10):
    (Y, perc, comp) = pca(X, i)
    perc_.append(perc)

fig, ax1 = plt.subplots(figsize=(14, 6))
plt.plot(range(1, 10), perc_, 'b-', label="eigen percentage")
plt.xlabel('number of components')
plt.ylabel('percentages')
plt.show()

(Y, perc, comp) = pca(X, 2)
print("variance:" + str(perc))
#calculate how important each feature was
scr = np.dot(np.linalg.inv(np.diag(np.std(X, axis=0))), comp)
#scale results to match when we plot them
scr = scr / np.linalg.norm(scr, axis=0)
Ejemplo n.º 5
0
plt.show()

singlelinkage_clustering(X, 2)


# plotPCA3D(X, column, names)



clustering_analysis(X, knn_clustering(X, 5), plot = True)




# Perform PCA
(Y, perc, comp) = pca(X,1)

# Plot percentage of the feature space according to eigenvalues
plotPCA(X)

# Plot Correlation Circle
df_tmp = df
coeffs = {}

coeffs['rebounds'] = 0.5
coeffs['blocks'] = 0.5

coeffs['personal_fouls'] = 1

coeffs['assists'] = 1
Ejemplo n.º 6
0
#plot distribution over features
fig, ax1 = plt.subplots(figsize=(14, 6))
data.boxplot(column=column)
ax1.xaxis.grid(False)
ax1.set_yscale('log')
plt.xlabel('parameters')
plt.ylabel('values')

###
print "---------------------------"
print names[data['arts'].idxmax()]
plt.show()

#%%
#perform PCA
(Y, perc) = pca(X, 2)
print "variance:" + str(perc)
#calculate how important is feature was
scr = np.dot(np.transpose(X), Y)
#scale results to match when we plot them
scr[:, 0] = scr[:, 0] / (scr.max() - scr.min())
scr[:, 1] = scr[:, 1] / (scr.max() - scr.min())


#scatter plot on principal components
##we need this function only to update the scatter plot when we select points
def onpick(event, axes, Y):
    ind = event.ind
    axes.annotate(names[ind], (Y[ind, 0], Y[ind, 1]))
    plt.draw()
Ejemplo n.º 7
0
Axes3D

n_points = 2000
X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0)
#X, color = datasets.samples_generator.make_swiss_roll(n_points, random_state=0)
n_components = 2
n_neighbors = 5

fig = plt.figure(figsize=(15, 8))
ax = fig.add_subplot(251, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
ax.view_init(4, -72)

#------PCA--------our implementation
t0 = time()
(Y, perc) = pcaImp.pca(X, n_components)
t1 = time()
print("PCA(imp): %.2g sec" % (t1 - t0))
ax = fig.add_subplot(252)
plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
plt.title("PCA(imp) (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')
#-----------------

#------MDS--------our implementation (classical MDS)
t0 = time()
D = spd.squareform(spd.pdist(X, 'euclidean'))
Y = mdsImp.mds(D, n_components)
t1 = time()