Beispiel #1
0
def pca_hash(x_train, XX, nbits, manhattan_hash = False, manhattan_bit = 2):
  """
  Compute the hash code with Principal Component Analysis (PCA)

  Args:
    x_train: training data with shape (#ntest, #dimension of feature)
    XX: training and testing data with shape (#data, #dimension of feature)
    nbtis: the number of dimension of the resultant binary code
  Returns:
    Y: the compact binary code (#data, nbits)
  """
  (n_train, _) = x_train.shape
  if manhattan_hash == True:
    nbits = int(ceil(nbits / manhattan_bit))

  (eigvec, _) = pca(x_train, nbits)
  eigvec = eigvec.real
  Y = np.dot(XX, eigvec)
  print "Shape (training set) after pca: ", Y.shape
  #print Y
  # Y has the size (#test x #eigvalue)
  if manhattan_hash == True:
    Y = manhattan_quant(Y, n_train, nbits, manhattan_bit)
  else:
    Y = Y >= 0
    Y = compactbit(Y)

  return Y
Beispiel #2
0
def dpca(dihedrals, unit='degree', verbose=False):
    """Perform a dihedral pca
    Input:
        Dihedral angle data: rows (first index) are the angles and
                             columns (second index) observations
        unit: degree or radian
    Returns:
        Array of coordinates in the space spanned by the dihedral principal components
    """

    # Create cartesian coordinate space of x = cos(phi), y = sin(phi)
    cartcoords = np.zeros([2 * dihedrals.shape[0], dihedrals.shape[1]], dtype=dihedrals.dtype)
    if unit == "degree":
        cosines = np.cos(const.pi / 180.0 * dihedrals)
        sines   = np.sin(const.pi / 180.0 * dihedrals)
    elif unit == "radian":
        cosines = np.cos(dihedrals)
        sines   = np.sin(dihedrals) 
    else:
        print("Angular unit must be degree or radian but not {}".format(unit))
        sys.exit(0)
    cos_idx = np.arange(0,2*dihedrals.shape[0],2)
    sin_idx = np.arange(1,2*dihedrals.shape[0],2)
    cartcoords[cos_idx,:] = cosines
    cartcoords[sin_idx,:] = sines

    # Compute pca
    eigvals, eigvecs = pca(cartcoords, verbose=verbose)

#    # Project data on principal components
#    if verbose:
#        print("Projecting data on principal components:", end="")
#        starttime = time.time()   
#    projectedcoords = np.zeros_like(cartcoords)
#    for point_idx in range(cartcoords.shape[1]):
#        for eig_idx in range(eigvecs.shape[0]):
#            projectedcoords[eig_idx, point_idx] = np.dot(eigvecs[:,eig_idx], cartcoords[:,point_idx])
#    if verbose:
#        print(" {:.2f} sec.".format(time.time() - starttime))   

    # Project data on principal components more efficiently
    if verbose:
        print("Projecting data on principal components:", end="")
        starttime = time.time()   
    projectedcoords = np.zeros_like(cartcoords, dtype=cartcoords.dtype)
    msg = ""
    for eig_idx in range(eigvecs.shape[0]):
        if verbose:
            print(len(msg)*"\b", end="")
            msg = " {:3.0f}%".format(100.0*eig_idx / eigvecs.shape[0])
            print(msg, end="")
            sys.stdout.flush()
        product = cartcoords * eigvecs[:,eig_idx].reshape([eigvecs.shape[0],1])
        projectedcoords[eig_idx,:] = product.sum(0)
    if verbose:
        print(len(msg)*"\b", end="")
        print(" {:.2f} sec.".format(time.time() - starttime))    

    return eigvals, eigvecs, projectedcoords
Beispiel #3
0
def reduce3D(embeddings, model, anchors_pre_emb):

    anchors_post_emb = model.encode(anchors_pre_emb)
    data = np.append(embeddings, np.array(anchors_post_emb), axis=0)
    data_frame = pd.DataFrame(data)
    pca_model = pca(n_components=3)
    dict = pca_model.fit_transform(df)
    dim3 = np.array(dict['PC'])
    anchors = np.array([dim3[-3:]])
    return dim3, anchors
def NN_dim_red(dim_red):
    #X,Y,cols,name = get_breast_cancer_data()
    X,Y,cols,name = get_wine_data()
    if dim_red=="pca":
        X = pca(X,2)
    elif dim_red=="ica":
        X = ica(X,2)
    elif dim_red=="rp":
        X = rp(X,2)
    elif dim_red=="cs":
        X = cs(X,Y)

    Y_ohc = one_hot_encoding(Y)

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y_ohc, random_state=1,shuffle=True)
    op_shape = Y_ohc.shape[1]

    model = Sequential()
    model.add(Dense(32, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(op_shape, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    results = model.fit( X_train, Y_train, epochs= 50, batch_size = 32, validation_data = (X_test, Y_test),verbose=0)

    plt.plot(results.history['acc'], label="Training")
    plt.plot(results.history['val_acc'], label="Testing")
    plt.legend(loc='lower right')
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.title("NN_"+dim_red+"_"+str(name))
    #plt.show()
    if dim_red!="original":
        plt.title("NN_"+dim_red+"_"+str(name))
        plt.savefig("graphs/NN_"+dim_red+"_"+str(name)+".png")

        clf = MLPClassifier(solver='adam', hidden_layer_sizes=(32,16,4), random_state=0, activation='relu', max_iter = 50, batch_size=32) 
        clf = clf.fit(X_train, Y_train)
        train_predict = clf.predict(X_train)
        test_predict = clf.predict(X_test)
        plt2 = plot_learning_curve(clf, "NN_"+dim_red+"_lc_"+str(name), X, Y, ylim=[0,1])
        plt2.savefig("graphs/NN_"+dim_red+"_lc_"+str(name))
    else:
        plt.title("NN_"+str(name))
        plt.savefig("graphs/NN_"+str(name)+".png")

        clf = MLPClassifier(solver='adam', hidden_layer_sizes=(32,16,4), random_state=0, activation='relu', max_iter = 50, batch_size=32) 
        clf = clf.fit(X_train, Y_train)
        train_predict = clf.predict(X_train)
        test_predict = clf.predict(X_test)
        plt2 = plot_learning_curve(clf, "NN_"+dim_red+"_lc_"+str(name), X, Y, ylim=[0,1])
        plt2.savefig("graphs/NN_"+dim_red+"_lc_"+str(name))
Beispiel #5
0
def main(argv=None):
    
    # Lectura de archivo
    f = open('../../data/iris.data', 'r')
    lines = f.readlines()
    f.close()
    
    dataset = read_data()
    
    choice = ''
    while choice != 4:
        choice = input('1. Plot data\n2. PCA analysis\n3. Fisher\n4. Exit\n')
        if choice == 1:
            plot(dataset)
        elif choice == 2:
            pca(dataset)
        elif choice == 3:
            fischer(dataset)       
        elif choice == 4:
            return
        else:
            print('Invalid input. Try again.')        
Beispiel #6
0
def test(n=100):

    data = linear_testdata(n)

    w, v = pca(data)

    print(v)

    plt.plot(data[0,:], data[1,:], '.')
    plt.plot([0, v[0,0]], [0, v[1,0]], 'r')
    plt.plot([0, v[0,1]], [0, v[1,1]], 'g')
    plt.xlim(-0.5, 1.5)
    plt.ylim(-0.5, 1.5)
    plt.show()
Beispiel #7
0
def run_kmeans_pca():
    X_raw,Y,cols,name = get_breast_cancer_data()
    #X_raw,Y,cols,name = get_wine_data()

    X = pca(X_raw,2)

    c = len(np.unique(Y))
    kmeans = KMeans(n_clusters=c)
    kmeans.fit(X)
    y_kmeans = (kmeans.predict(X))

    plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, cmap='viridis')
    plt.xlabel("X1")
    plt.ylabel("X2")
    plt.title("Kmeans_pca_"+str(name))
    plt.savefig("graphs/Kmeans_pca_"+str(name)+".png")
Beispiel #8
0
def run_gmm_pca():
    X_raw, Y, cols, name = get_breast_cancer_data()
    #X_raw,Y,cols,name = get_wine_data()
    X = pca(X_raw, 2)

    c = len(np.unique(Y))
    gmm = GaussianMixture(n_components=c)
    gmm.fit(X)
    y_pred = gmm.predict(X)

    plt.gca()
    plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis')
    plt.xlabel("X1")
    plt.ylabel("X2")
    plt.title("GMM_pca_" + str(name))
    plt.savefig("graphs/GMM_pca_" + str(name) + ".png")
Beispiel #9
0
    def set_up_clustering(self):
        """
        Set up the clustering task by running PCA and splitting the data into training and testing sets.
        :return: None
        """
        new_data, variances, eigenvectors = pca(self.data)

        # truncate dimensions to just the first two
        small_data = new_data[:2, :]

        # if you haven't implemented PCA yet, you can test GMM by replacing the above code with
        # small_data = self.data[:2, :]

        # split data for validation
        d, n = small_data.shape

        # use fraction of data for training

        self.train_inds = np.random.rand(n) < 0.5

        self.train_data = small_data[:, self.train_inds]
        self.val_data = small_data[:, ~self.train_inds]
Beispiel #10
0
    def test_pca(self):
        """
        Perform PCA on the synthetic data and check that the returned values are as expected.

        :return: None
        """
        new_data, variances, eigenvectors = pca(self.data)

        assert np.allclose(np.zeros(64), np.mean(
            new_data, 1)), "The data is not centered to be zero-mean."

        assert variances[0] + variances[1] > np.sum(variances[2:]), "Variance of the first two dimensions should " \
                                                                    "be greater than the variance of the rest"

        assert np.sum(variances[:2]) > np.sum(variances[2:]), "Variances of first two dimensions were not larger than" \
                                                              "variances of the rest of the noise dimensions"

        assert np.var(eigenvectors[:, 0].T.dot(self.data)) > np.var(eigenvectors[:, 1].T.dot(self.data)), \
            "First principle direction doesn't have more variance than second principle direction"

        assert np.var(eigenvectors[:, 1].T.dot(self.data)) > np.var(eigenvectors[:, 2].T.dot(self.data)), \
            "Second principle direction doesn't have more variance than third principle direction"

        vector_0_1 = self.data[:, 1] - self.data[:, 0]
        new_vector_0_1 = new_data[:, 0] - new_data[:, 1]

        assert np.allclose(np.linalg.norm(vector_0_1), np.linalg.norm(new_vector_0_1)), "Distance between example 0 " \
                                                                                        "and 1 is not the same before" \
                                                                                        "and after PCA"

        assert np.allclose(eigenvectors.T.dot(eigenvectors),
                           np.eye(64)), "Eigenvectors were not orthogonal"

        reconstructed = eigenvectors.dot(new_data)
        assert np.allclose(reconstructed[:, 0] - self.data[:, 0], reconstructed[:, 1] - self.data[:, 1]), \
            "Reconstructed points are not similar."
Beispiel #11
0
data = sio.loadmat('ex7data1.mat')
X = data['X']
plt.scatter(X[:, 0], X[:, 1])
plt.axis([0.5, 6.5, 2, 8])
plt.show()
input('Program paused. Press enter to continue.\n')

# =============== Part 2: Principal Component Analysis ===============
#  You should now implement PCA, a dimension reduction technique. You
#  should complete the code in pca.m
#
print('\nRunning PCA on example dataset.\n')

X_norm, mu, sigma = featureNormalize(X)

U, S = pca(X_norm)
print(X_norm.shape)
print(mu.shape)
print(sigma.shape)
print(U.shape)
print(S.shape)
drawLine(mu, mu + 1.5 * S[0] * U[:, 0])
drawLine(mu, mu + 1.5 * S[1] * U[:, 1])
plt.scatter(X[:, 0], X[:, 1])
plt.axis([0.5, 6.5, 2, 8])
plt.show()
print('Top eigenvector: \nU[:, 0] = {}'.format(U[:, 0]))
print('You should expect to see [-0.707107 -0.707107]')

input('Program paused. Press ENTER to continue')
            continue
        num += 1  # 最后选出来的列数代表维度(根据原始数据p*n
        line = line[1:]  # 去掉第一个字符串
        line = list(map(float, line))
        data.append(line)

    f.close()
    print("Data has been read successfully.")
    print("The dimension is " + str(num))
    print(data[0][0])

    data = np.array(
        data
    ).T  # 原始数据一列代表一个example,一行代表一个维度,现在要求转置,变成可以求PCA的形式,也就是一行代表一个example,一列代表一个维度。
    print("Now reducing dimension...")
    lowDData = pca(dataMat=data, percentage=0, k=k_dimentions)
    print("Finished, the new dimension is :" + str(len(lowDData[0])))

    print("Start writing new data...")
    destfile = '../../data_dimRed_' + str(PCA_percentage) + '.txt'
    print(len(lowDData))
    f = open(destfile, 'w')
    for i in range(0, len(lowDData)):
        for j in range(0, len(lowDData[i])):
            f.write(str(lowDData[i][j]) + '\t')
        f.write('\n')
    end_time = time.time()
    duration = end_time - start_time
    print('Time cost: %fs.\n' % float(duration))
    print("Finished the whole work.")
Beispiel #13
0
import scipy.optimize as op
import matplotlib
from matplotlib import pyplot as plt
from pca import *

if __name__ == '__main__':
    fig, ax = plt.subplots(1, 2, figsize=(10, 5))  #figsize=(10,5)用来控制生成图片的大小
    ex7data1 = np.load('ex7data1.npz')
    x = ex7data1['X']
    m, n = x.shape
    ax[0].scatter(x[:, 0], x[:, 1], c='b', marker='o')
    ax[0].set_title('Original Data')
    norm, mean, std = normalize(x)
    ax[1].scatter(norm[:, 0], norm[:, 1], c='b', marker='*')
    ax[1].set_title('Normalized Data')
    u, s = pca(norm)  #得到特征向量和特征值
    z = project(norm, u, 1)
    # recovery from projected data
    xr = recovery(z, u, 1)
    ax[1].scatter(xr[:, 0], xr[:, 1], c='r', marker='+')

    # reverse operation of normalization on approximate reconstruction xr
    xrr = revernorm(xr, mean, std)  #标准化的逆过程
    ax[0].scatter(xrr[:, 0], xrr[:, 1], c='r', marker='*')

    for i in range(0, m):
        print(x[i])
        line0 = np.vstack((x[i], xrr[i]))
        print(line0)
        line1 = np.vstack((norm[i], xr[i]))
        ax[0].plot(line0[:, 0], line0[:, 1], 'k--')
Beispiel #14
0
	datas_=datas(d)
	for i in data:
		for j in range(d):
			datas_.append_var(j, float(i[j]))
	
	for i in label:
		datas_.append_var(d,float(i))
	return datas_
			
def pca(data,d):
	data_ = nolabel(data, d)
	pca_(data_, d)

def fld(data,label,d):
	data_ = labelling(data,label,d)
	fld_(data_, d)

def k_means_clustering(data,k):
	d=len(data[0])
	data_=nolabel(data,d)
	k_means_clustering_(data_,k,d)

def spectral_clustering(affinity_matrix,k):
	spectral_clustering_(affinity_matrix, k)


y=pca(data,d)
y=fld(data,label,d)
labels=k_means_clustering(data,k)
labels=spectral_clustering(affinity_matrix, k)
            print(i / 200)
        if float(line[1]) < 10:
            continue
        num += 1
        line = line[1:]
        line = list(map(float, line))
        data.append(line)

    f.close()
    print("Data has been read successfully.")
    print("The dimension is " + str(num))
    print(data[0][0])

    data = np.array(data).T
    print("Now reducing dimension...")
    lowDData = pca(data, 0.99)
    print("Finished, the new dimension is :" + str(len(lowDData[0])))

    print("Start writing new data...")
    destfile = '../../data_dimRed_0.99.txt'
    print(len(lowDData))
    f = open(destfile, 'w')
    for i in range(0, len(lowDData)):
        for j in range(0, len(lowDData[i])):
            f.write(str(lowDData[i][j]) + '\t')
        f.write('\n')

    print("Finished the whole work.")

    # testArray = np.array([[4,3,2],[3,2,1],[2,0,0]])
    # lowd,res = pca(testArray)
Beispiel #16
0
			#element wise multiplication

			distance = np.inner(test_data -self.central_point[i], test_data - self.central_point[i])
			#print distance
			distance = np.sum(distance)
			distances.append(distance)
		distances = np.array(distances)
		#print distances
		t = np.argmin(distances)

		return t

k_means = K_means_classifier(10)
all_data = np.vstack((train_image, test_image))

all_data = pca(all_data, topNfeat = 700)
train_image = all_data[:60000]
test_image = all_data[60000:]

k_means.train(train_image, train_label)

#print k_means.central_point
cnt = 0
for i in range(1, num_test):
	t = k_means.predict(test_image[i])
	if t == test_label[i]:
		cnt += 1
	if i % 1000 == 0:
		print i 
		print float(cnt) / i
		log.write('test size = ' + str(i) + ' test accuracy: ')
correlacao = [stats.pearsonr(n.array(col), n.array(coluna))[0]
              for col in colunas
              for coluna in colunas]
for i in range(8):
    m.append(correlacao[i*8 : i*8 + 8])
# pearson == m

print 'PEARSON'

for linha in m:
    print [str(round(x, ndigits=2)) for x in linha]


# cálculo PCA
#T, P, E = pca.PCA_nipals(nn)
matriz_cov, autovetores, autovalores, autovalores_prop, dados_finais = pca(nn)
T = dados_finais
P = autovetores.T
E = autovalores
princ = T[:,:2]

# cálculo dos autovalores %
print 'AUTOVALORES', E * 100

# contribuições
c1 = P[0]
c2 = P[1]
cc1 = c1 / sum(abs(c1)) * 100
cc2 = c2 / sum(abs(c2)) * 100
print 'CONTRIBUICOES'
print 'C1', [abs(x) for x in cc1]
Beispiel #18
0
    col = 10
    row = int(np.ceil(n / 10.0))
    fig, ax = plt.subplots(row, col)
    #rnd = np.random.randint(0,np.size(x,0),n)
    rnd = np.arange(0, n)
    xl = x[rnd].reshape((n, picsize[0], picsize[1]))
    for i in range(0, row):
        for j in range(0, col):
            # if not transpose xl[i*col+j], the picture will show horizontally
            ax[i, j].imshow(xl[i * col + j].T, cmap=plt.cm.gray)
            ax[i, j].set_xticks([])
            ax[i, j].set_yticks([])
            ax[i, j].set_title(str)
    plt.axis('off')


if __name__ == '__main__':
    k = 100
    x = np.load('ex7faces.npz')['X']
    norm, mean, std = normalize(x)
    u, s = pca(norm)
    z = project(norm, u, k)
    # recovery from projected data
    xr = recovery(z, u, k)

    randomShow(x, 50, (32, 32), 'x')  #原始图像,包含1024个特征的图像
    randomShow(norm, 50, (32, 32), 'norm')  #经过标准化之后的图像
    randomShow(z, 50, (10, 10), 'z')  #经过pca之后压缩的图像,此时只用最重要的前100个特征来表示该图像
    randomShow(xr, 50, (32, 32), 'xr')  #使用前100个主成分复原的图像
    plt.show()
img_3 = numpy.array(I_3).reshape(1,numpy.product(numpy.array(I_3).shape))
img_4 = numpy.array(I_4).reshape(1,numpy.product(numpy.array(I_4).shape))
img_5 = numpy.array(I_5).reshape(1,numpy.product(numpy.array(I_5).shape))


# print(img_1.shape)
# print(img_2.shape)
# print(img_3.shape)
# print(img_4.shape)
# print(img_5.shape)

cat_dataset = numpy.vstack((img_1,img_2,img_3,img_4,img_5))
# print(cat_dataset.shape)

#print(dataMat)
lowDDataMat, reconMat = pca(cat_dataset, 500)
numpy.save(file="./data/mat/lowDDataMat.npy", arr=lowDDataMat)
numpy.save(file="./data/mat/reconMat.npy", arr=reconMat)
print(lowDDataMat.shape)
print(reconMat.shape)



# reimg_1 = numpy.vsplit(reconMat,5)[0]
# print(reimg_1.shape)


# trans_img1 = numpy.reshape(reimg_1,numpy.array(I_1).shape)
# print(trans_img1.shape)
# reimg_1 = Image.fromarray(trans_img1).convert('RGB')
# reimg_1.save('./data/reconimg/01.png')
Beispiel #20
0
    fig = plt.figure()
    ax = fig.add_subplot(111)
    # flatten()方法能将matrix的元素变成一维的,A能使matrix变成array,A[0]或者数组数据
    ax.scatter(dataMat[:, 0].flatten().A[0],
               dataMat[:, 1].flatten().A[0],
               marker='^',
               s=90,
               c='green')
    ax.scatter(reconMat[:, 0].flatten().A[0],
               reconMat[:, 1].flatten().A[0],
               marker='o',
               s=50,
               c='red')
    plt.show()


if __name__ == "__main__":
    # 1 加载数据,并转化数据类型为float
    dataMat = loadDataSet('./testSet.txt')
    # print('加载原始特征数据:\n',dataMat)

    # 2 主成分分析降维特征向量设置
    lowDmat, reconMat = pca(dataMat, 1)
    # print(shape(lowDmat))
    # 只需要2个特征向量,和原始数据一致,没任何变化
    # lowDmat, reconMat = pca(dataMat, 2)
    # print(shape(lowDmat))

    # 3 将降维后的数据和原始数据一起可视化
    show_picture(dataMat, reconMat)
Beispiel #21
0
        line = f.readline()
        line = line[:-1].split('\t')
        if string.atof(line[1])<10:
            continue
        num+=1
        line = line[1:]
        line = map(string.atof,line)
        data.append(line)

    f.close()
    print "Data has been read successfully."
    print "The dimension is "+str(num)

    data = np.array(data).T
    print "Now reducing dimension..."
    lowDData = pca(data,0.90)
    print "Finished, the new dimension is :"+str(len(lowDData[0]))

    print "Start writing new data..."
    destfile = '../data/data_dimRed.txt'
    f = open(destfile,'wb')
    for i in range(0,len(lowDData)):
        for j in range(0,len(lowDData[i])):
            f.write(str(lowDData[i][j])+'\t')
        f.write('\n')

    print "Finished the whole work."

    # testArray = np.array([[4,3,2],[3,2,1],[2,0,0]])
    # lowd,res = pca(testArray)
    # print res
Beispiel #22
0
def compressITQ(mx, bit, iters):
    Y = pca(mx, bit)
    itq(Y, iters)
Beispiel #23
0
# %%
import pca
print(pca.__version__)

# %%
from sklearn.datasets import load_iris
import pandas as pd
from pca import pca

# Initialize
model = pca(n_components=3)

# Dataset
X = pd.DataFrame(data=load_iris().data,
                 columns=load_iris().feature_names,
                 index=load_iris().target)

# Fit transform
out = model.fit_transform(X)

# Make plots
model.scatter()
ax = model.biplot(n_feat=4)
ax = model.plot()

# Make 3d plolts
model.scatter3d()
ax = model.biplot3d()

# Normalize out PCs
model = pca()
from pca import *

# ---------------------------------------------- #
# SCRIPT TO RUN PCA ON SLOO  					 #
# ---------------------------------------------- #

# Filepath to SLOO Data
trainName = "../../dataset/sloo/train.csv"
testName = "../../dataset/sloo/test.csv"
pca(trainName, testName)
Beispiel #25
0
    print()
    print(
        "pca - can be followed by two distinct integers between 0 and 15 for custom use of columns"
    )
    print(
        "isomap - must be followed by a positive integer determining the number of nearest neighbors"
    )
    shutdown()

mat, zoo_type, zoo_name = get_data_matrix()
x = None

if arg == "pca":
    if len(sys.argv) < 3:
        x = pca(mat)
    elif len(sys.argv) == 3:
        print("Invalid number of PCA integer arguments")
        shutdown()
    else:
        try:
            t1 = int(sys.argv[2])
            t2 = int(sys.argv[3])
            x = pca(mat, [t1, t2])
        except:
            print("PCA integer argument invalid")
            shutdown()

elif arg == "mds-data":
    mat = center_matrix(mat)
    x = mds_data(mat)
Beispiel #26
0
#Example: using PCA to reduce the dimensionality of
#		  semiconductor manufacturing data
#Author: Justin Nie
#Date: 2018/2/15

from numpy import *
from pca import *

dataset = load_dataset('secom.data', ' ')
data_mat = mat(dataset)
data_mat = replace_nan(data_mat)
check_eigen(data_mat, 20)
low_data_mat, new_data_mat = pca(data_mat, 20)
print(shape(low_data_mat))
print(shape(data_mat))

Beispiel #27
0
#-*-coding:utf-8-*-
#对半导体材料数据降维
from pca import *
dataMat = replaceNanWithMean()

lowDataMat, reconMat = pca(dataMat, 6)
print(lowDataMat)
Beispiel #28
0
print(shape(eigVals))
print(eigVals)

print(shape(eigVects))
print(eigVects)

import matplotlib.pyplot as plt

Var = eigVals
Var_sum = sum(Var)
Var_rate = Var / Var_sum
plt.plot(Var_rate[:20], 's-')
plt.show()

Var = eigVals
Var_sum = sum(Var)
Var_add = zeros_like(Var)
for i in range(len(Var)):
    Var_add[i] = sum(Var[:i + 1]) / Var_sum
plt.plot(Var_add[:20], 's-')
plt.show()

lowDMat, reconMat = pca(dataMat, 6)
print(lowDMat)
print(reconMat)

lowDMat, reconMat = pca(dataMat, 20)
print(lowDMat)
print(reconMat)
Beispiel #29
0
from pca import *
#import plotter

model = pca('data-1.txt')
#plotter.pca_plotter(model)