Exemple #1
0
def get_data(pca_ON=False, print_shapes=False):
    data = pd.read_csv('mnist_train.csv').as_matrix()

    Xtrain = data[:-10000, 1:]
    Ytrain = data[:-10000, 0]
    Xtest = data[-10000:, 1:]
    Ytest = data[-10000:, 0]

    dataset = {}

    if pca_ON:
        pca = PCA(n_components=30)
        pca.fit(Xtrain)
        if print_shapes:
            print('\nEigenvectors size:', pca.evecs.shape)
        Xtrain = pca.transform(Xtrain)
        Xtest = pca.transform(Xtest)

    if print_shapes:
        print('\nXtrain: {}, Ytrain: {}'.format(Xtrain.shape, Ytrain.shape))
        print('Xtest: {}, Ytest: {}'.format(Xtest.shape, Ytest.shape))

    dataset['train'] = (Xtrain, Ytrain)
    dataset['test'] = (Xtest, Ytest)

    return dataset
Exemple #2
0
def main(args):
    # Read data file into numpy matrices
    with gzip.open(args.mnist_train_data, 'rb') as in_gzip:
        magic, num, rows, columns = struct.unpack('>IIII', in_gzip.read(16))
        all_data = np.array([np.array(struct.unpack('>{}B'.format(rows * columns),
                                           in_gzip.read(rows * columns)))
                    for _ in range(16000)])
    with gzip.open(args.mnist_train_labels, 'rb') as in_gzip:
        magic, num = struct.unpack('>II', in_gzip.read(8))
        all_labels = struct.unpack('>16000B', in_gzip.read(16000))
    each_label = np.empty(10, dtype = object)
    for i in range(10):
        each_label[i] = all_data[np.array(all_labels) == i]
    pca = PCA(15)
    pca.fit(all_data)
    all_data_transform = pca.transform(all_data)
    kmeans_labels = KMeans(n_clusters=10, random_state=0).fit_predict(all_data_transform)
    each_cluster = np.empty(10, dtype = object)
    for i in range(10):
        each_cluster[i] = all_data_transform[:,:2][np.array(kmeans_labels) == i]
    f, axarr = plt.subplots(2, 10, figsize=(18, 4), sharey=True)
    for i in range(10):
        a = pca.transform(each_label[i])
        axarr[0][i].scatter(a.T[0], a.T[1], s = 1)
    for i in range(10):
        axarr[1][i].scatter(each_cluster[i].T[0], each_cluster[i].T[1], s = 1)
    #plt.show()
    coincidence_matrix = np.zeros((10,10)).astype(int)
    for i in range(16000):
        coincidence_matrix[all_labels[i], kmeans_labels[i]]+=1
    print(coincidence_matrix)
    plt.savefig("labels_vs_kmeans_clusters.jpg")
Exemple #3
0
def main(args):
    # Read data file into numpy matrices
    with gzip.open(args.mnist_train_data, 'rb') as in_gzip:
        magic, num, rows, columns = struct.unpack('>IIII', in_gzip.read(16))
        all_data = [np.array(struct.unpack('>{}B'.format(rows * columns),
                                           in_gzip.read(rows * columns)))
                    for _ in range(60000)]
    # Read labels file into labels
    with gzip.open(args.mnist_train_labels, 'rb') as in_gzip:
        magic, num = struct.unpack('>II', in_gzip.read(8))
        all_labels = struct.unpack('>60000B', in_gzip.read(60000))
 
 
 
    pca = PCA(5)
 
 
    pca.fit(all_data)
    components = pca.return_components()
    components = np.reshape(components, (5, 28, 28))
 
 
    one = PCA(5)
    one.fit()
    one_comp = pca.return_components()
    f, axarr = plt.subplots(1, 5, figsize=(18, 4), sharey=True)
 
    for i in range(5):
        axarr[i].imshow(components[i])
        axarr[i].set_aspect('equal')
        axarr[i].set_title('Component {}'.format(i + 1))
    plt.tight_layout()
    name = 'Hrach'
    plt.savefig('comps-{}.png'.format(name), dpi=320)
Exemple #4
0
def main(args):
    # Read data file into numpy matrices
    with gzip.open(args.mnist_train_data, 'rb') as in_gzip:
        magic, num, rows, columns = struct.unpack('>IIII', in_gzip.read(16))
        all_data = np.array([
            np.array(
                struct.unpack('>{}B'.format(rows * columns),
                              in_gzip.read(rows * columns)))
            for _ in range(16000)
        ])
    with gzip.open(args.mnist_train_labels, 'rb') as in_gzip:
        magic, num = struct.unpack('>II', in_gzip.read(8))
        all_labels = struct.unpack('>16000B', in_gzip.read(16000))
    zeros = all_data[np.array(all_labels) < 0.5]
    #plt.imshow(all_data[0].reshape(28,28))
    #plt.show()
    pca = PCA(5)
    pca.fit(all_data)
    print(pca.return_components().shape)
    components = pca.return_components().reshape(5, 28, 28)
    f, axarr = plt.subplots(1, 5, figsize=(18, 4), sharey=True)
    for i in range(5):
        axarr[i].imshow(components[i])
        print(i, components[i].shape)
        axarr[i].set_aspect('equal')
        axarr[i].set_title('Component {}'.format(i + 1))
    plt.tight_layout()
    name = 'TODO'  # TODO: Remplace name with your name
    plt.savefig('comps-{}.png'.format(name), dpi=320)
Exemple #5
0
class MSNM:
    # M是输入变量的维度
    def __init__(self, M):
        self.pca = PCA()
        # 传入pca的缩放参数
        self.pca_w = np.ones(M)
        self.M = M

    # 训练, 并且使用R2R优化, n_component是主成分分析降维的个数, K 是 R2R的计算次数, rc是设置的w的偏移率
    def train(self,
              label_train,
              train_labels,
              unlabel_train,
              n_component=50,
              K=10,
              rc=0.1,
              epoch=20):
        pls = PLS()
        S = np.empty((K, self.M))
        Y = np.empty(K)
        for e in range(epoch):
            for i in range(K):
                w = self.pca_w + rc * np.random.normal(0, 1, self.M)
                self.pca.fit(unlabel_train, n_component, w)
                Dst, Qst = self.pca.Dst_Qst(label_train, w)
                score = self.pca.anomaly_score(Dst, Qst)
                auc = roc_auc_score(train_labels, score)
                S[i, :] = w
                Y[i] = auc
            pls.fit(S, Y, 10)
            self.pca_w = self.pca_w + (3 * pls.B).reshape(self.M)

            print("epoch ", e + 1, "/", epoch)

    # 返回异常分数
    def test(self, X):
        # 计算异常分数
        Dst, Qst = self.pca.Dst_Qst(X, self.pca_w)
        scores = self.pca.anomaly_score(Dst, Qst)
        return scores
Exemple #6
0
def main():
    dim = 2
    num_class = 3
    dataset_dir = '../input/wine.csv'
    train_x, train_y, raw_data = data_loader(dataset_dir)
    pca = PCA(first_k=dim, use_threshold=False, threshold=0.5)
    proj = pca.fit(train_x)
    kmeans = KMeans(K=num_class)
    center, predict_y = kmeans.fit(proj)
    result = evaluate(proj, train_y, predict_y, k=num_class)
    visualization(center, proj, predict_y, dim)
    save_to_csv(raw_data, predict_y)
    print(result)
Exemple #7
0
from sklearn.neighbors import KNeighborsClassifier

SHAPE = (46, 56)

M = 121
standard = False

data = split_data()

X_train, y_train = data['train']

D, N = X_train.shape

pca = PCA(n_comps=M, standard=standard)

W_train = pca.fit(X_train)

X_test, y_test = data['test']
I, K = X_test.shape

W_test = pca.transform(X_test)

nn = KNeighborsClassifier(n_neighbors=1)
nn.fit(W_train.T, y_train.T.ravel())

y_hat = nn.predict(W_test.T)

done = {'success': False, 'failure': False}

fig, axes = plt.subplots(nrows=2)
Exemple #8
0
    acertoPCAS = [[], [], [], []]
    extraido = list(range(1, 19))

    for j in extraido:
        erros = [0] * 4
        errosScore = [0] * 4
        for i in range(hold):
            pcaR = PCAR()
            pcaRS = PCARS()
            train_atr, test_atr, train_classes, test_classes = train_test_split(
                baseClimate.atributos,
                baseClimate.classes,
                test_size=0.5,
                random_state=i)
            b = Base(train_classes, train_atr)  #cria a base de treino
            pcaR.fit(b)  #prepara o PCA
            pcaRS.fit(b)  #prepara o PCA com score

            baseTreino = pcaR.run(Base(train_classes, train_atr),
                                  j)  #Cria base de treino projetada pelo PCA
            baseTeste = pcaR.run(Base(test_classes, test_atr),
                                 j)  #Cria base de teste projetada pelo PCA

            baseTreinoS = pcaRS.run(
                Base(train_classes, train_atr),
                j)  #Cria base de treino projetada pelo PCA com score
            baseTesteS = pcaRS.run(
                Base(test_classes, test_atr),
                j)  #Cria base de teste projetada pelo PCA com score
            qt1 = len(test_classes)
            #Erros dos classificados - PCA
Exemple #9
0
def tsne(data, label):
    from sklearn.manifold import TSNE
    tsne = TSNE(n_components=2, init='pca')
    result = tsne.fit_transform(data)
    visualize(result, label)
    

if __name__ == "__main__":
    winefile = "../input/wine.csv"
    print("---------Preprocessing------------")
    dataset = DataSet()
    dataset.preprocessing(winefile)
    
    print("---------PCA------------")
    pca = PCA()
    dimmedset = pca.fit(dataset.dataSet)
#    print(dimmedset)
    
    print("---------K-means------------")
    kmeans = Kmeans()
    
    print("---------不采用降维数据------------")
    predictlabel = kmeans.cluster(dataset.dataSet)
#    print(predictlabel)
    evaluate(dataset.label, predictlabel, dataset.dataSet)
    print("使用TSNE对不降维的聚类结果可视化:")
    tsne(dataset.dataSet, predictlabel)
#    print("---------Output------------")
#    filename = "../output/undimmedCluster.csv"
#    with open(filename, 'w', newline='') as outfile:
#        outlist = (np.array(predictlabel)[:, np.newaxis]).tolist()
Exemple #10
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from PCA import PCA

pre_data = datasets.load_iris()
X = pre_data.data
Y = pre_data.target

pca = PCA(2)
pca.fit(X)
X_transformed = pca.transform(X)

print("Dimensions of X is " + str(X.shape))

print("Dimensions of X_transformed is" + str(X_transformed.shape))

print(Y)

x1 = X_transformed[:, 0]
x2 = X_transformed[:, 1]

plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.scatter(x1,
            x2,
            alpha=1,
            cmap=plt.cm.get_cmap('Dark2', 3),
            c=Y,
            edgecolor="red")
plt.colorbar()
Exemple #11
0
"""
import numpy as np
from PCA import PCA
import matplotlib.pyplot as plt
import json
import os

if __name__ == "__main__":
    images = np.load('./data/sampled_image.npy')
    labels = np.load('./data/sampled_label.npy')
    print(images.shape, labels.shape)
    images_reshape = images.reshape(images.shape[0], images.shape[1]*images.shape[2])
    print(images_reshape.shape)

    pca = PCA()
    pca.fit(images_reshape)
    data = pca.transform(2)

    dirs = './pic/'
    if not os.path.exists(dirs):
        os.makedirs(dirs)

    data_set = []
    data_x = []
    data_y = []
    for i in range(data.shape[0]):
        path = "./pic/{}.jpg".format(i)
        sample = [data[i][0].real, data[i][1].real, labels[i], path]
        data_set.append(sample)
        data_x.append(data[i][0].real)
        data_y.append(data[i][1].real)
Exemple #12
0
        plt.subplot(np.sqrt(pictN),np.sqrt(pictN),i+1)
        data=dataFace[i,:].reshape((32,32)).T
        # img = scipy.misc.toimage(data)
        plt.imshow(data,cmap='gray')
    plt.show()
imageShow(dataFace)
# myP=PCA(x,number=1)
# myP.train()
# Z=myP.fit()
# Xnex=myP.reconstruct(Z)
# plt.plot(Xnex[:,0],Xnex[:,1],'ro')
# plt.show()
for i in [5,10,50,100,500]:
    myP=PCA(dataFace,number=i)
    myP.train()
    Z=myP.fit()
    Xnex=myP.reconstruct(Z)
    imageShow(Xnex)












                    c += 1
                else:
                    d += 1
    r = (a + d) / (a + b + c + d)
    return r


def visualize(pca_x, cluster):
    import pandas as pd
    y = np.array(cluster).reshape(-1, 1)
    x = pd.DataFrame(np.concatenate((pca_x, y), axis=-1))
    data = pd.DataFrame(x, index=x.index)
    d1 = data[x[2] == 0]
    d2 = data[x[2] == 1]
    d3 = data[x[2] == 2]
    plt.plot(d1[0], d1[1], 'r.', d2[0], d2[1], 'gx', d3[0], d3[1], 'b*')
    plt.show()


if __name__ == '__main__':
    dataset_dir = '../input/wine.data'
    x, y = load_data(dataset_dir)
    model = PCA(threshold=0.5)
    pca = model.fit(x)
    print('dim: ', pca.shape[1])
    kmodel = KMeans()
    cluster, s = kmodel.fit(pca)
    print('r: ', evaluate(cluster, y))
    visualize(pca, cluster)
    np.savetxt('../output/output.csv', cluster, delimiter=',', fmt='%d')