Esempio n. 1
0
def main(args):
    # Read data file into numpy matrices
    with gzip.open(args.mnist_train_data, 'rb') as in_gzip:
        magic, num, rows, columns = struct.unpack('>IIII', in_gzip.read(16))
        all_data = [np.array(struct.unpack('>{}B'.format(rows * columns),
                                           in_gzip.read(rows * columns)))
                    for _ in range(60000)]
    # Read labels file into labels
    with gzip.open(args.mnist_train_labels, 'rb') as in_gzip:
        magic, num = struct.unpack('>II', in_gzip.read(8))
        all_labels = struct.unpack('>60000B', in_gzip.read(60000))
 
 
 
    pca = PCA(5)
 
 
    pca.fit(all_data)
    components = pca.return_components()
    components = np.reshape(components, (5, 28, 28))
 
 
    one = PCA(5)
    one.fit()
    one_comp = pca.return_components()
    f, axarr = plt.subplots(1, 5, figsize=(18, 4), sharey=True)
 
    for i in range(5):
        axarr[i].imshow(components[i])
        axarr[i].set_aspect('equal')
        axarr[i].set_title('Component {}'.format(i + 1))
    plt.tight_layout()
    name = 'Hrach'
    plt.savefig('comps-{}.png'.format(name), dpi=320)
Esempio n. 2
0
 def buildPCA(self, marks):
     TableMarks, mean_shape = pa.GPA(marks)
     self.mean_shape = mean_shape
     marks = np.asarray(TableMarks)
     accuracy = 0.98
     PCAmodel = PCA(marks, accuracy)
     return PCAmodel
Esempio n. 3
0
def test_pca():
    data_ingestor = DataIngestor()
    X, y, _, _ = data_ingestor.load_mnist()

    X = X.T


    pca = PCA()

    dimensionality = [1, 10, 100, 500, 784]
    element = [1, 2, 3, 4]
    

    fig, axes = plt.subplots(len(dimensionality), 1, sharey=True)
    plt.gray()
    for i, big_ax in enumerate(axes, start=0):
        big_ax.set_title('PCs = ' + str(dimensionality[i]))
        big_ax.tick_params(labelcolor=(1.,1.,1., 0.0), top='off', bottom='off', left='off', right='off')
        big_ax._frameon = False
        _, X_tilde = pca.compute_pca(X, dimensionality[i])
        for j in range(len(element)):
            ax = fig.add_subplot(len(dimensionality), len(element), i*len(element) + j + 1)
            ax.imshow(X_tilde.T[element[j]].reshape([28,28]))
            plt.axis('off')
    
    plt.show()

    eigvals, _ = pca.compute_pca(X)

    reconstruction_error = [np.sum(eigvals[i:]) for i in range(len(eigvals))]
    plt.plot(range(len(reconstruction_error)), reconstruction_error)
    plt.axhline(0, color='black')
    plt.title('Average Construction Error')
    plt.show()
Esempio n. 4
0
def get_data(pca_ON=False, print_shapes=False):
    data = pd.read_csv('mnist_train.csv').as_matrix()

    Xtrain = data[:-10000, 1:]
    Ytrain = data[:-10000, 0]
    Xtest = data[-10000:, 1:]
    Ytest = data[-10000:, 0]

    dataset = {}

    if pca_ON:
        pca = PCA(n_components=30)
        pca.fit(Xtrain)
        if print_shapes:
            print('\nEigenvectors size:', pca.evecs.shape)
        Xtrain = pca.transform(Xtrain)
        Xtest = pca.transform(Xtest)

    if print_shapes:
        print('\nXtrain: {}, Ytrain: {}'.format(Xtrain.shape, Ytrain.shape))
        print('Xtest: {}, Ytest: {}'.format(Xtest.shape, Ytest.shape))

    dataset['train'] = (Xtrain, Ytrain)
    dataset['test'] = (Xtest, Ytest)

    return dataset
Esempio n. 5
0
def runPCA(data, elems=None, components=None, threshold=None):

    t_data = theano.shared(np.asarray(data, dtype=theano.config.floatX),
                           name='data',
                           borrow=True)

    if components is not None and threshold is not None:
        print('You Can' ' Run PCA Using Threshold And Components')
        exit(-1)

    t_components = None
    t_threshold = None

    if components is not None:
        t_components = theano.shared(value=components,
                                     name='components',
                                     borrow=True)
    elif threshold is not None:
        t_threshold = theano.shared(value=threshold,
                                    name='components',
                                    borrow=True)

    idx = T.lvector('idx')
    m_data = T.matrix('data')

    pca = PCA(data=m_data, components=t_components, threshold=t_threshold)

    theanoPCA = theano.function(inputs=[idx],
                                outputs=pca.process(),
                                givens={m_data: t_data[idx]})

    if elems is None:
        elems = np.arange(len(data), dtype='int64')
    return theanoPCA(elems)
Esempio n. 6
0
def main(args):
    # Read data file into numpy matrices
    with gzip.open(args.mnist_train_data, 'rb') as in_gzip:
        magic, num, rows, columns = struct.unpack('>IIII', in_gzip.read(16))
        all_data = np.array([np.array(struct.unpack('>{}B'.format(rows * columns),
                                           in_gzip.read(rows * columns)))
                    for _ in range(16000)])
    with gzip.open(args.mnist_train_labels, 'rb') as in_gzip:
        magic, num = struct.unpack('>II', in_gzip.read(8))
        all_labels = struct.unpack('>16000B', in_gzip.read(16000))
    each_label = np.empty(10, dtype = object)
    for i in range(10):
        each_label[i] = all_data[np.array(all_labels) == i]
    pca = PCA(15)
    pca.fit(all_data)
    all_data_transform = pca.transform(all_data)
    kmeans_labels = KMeans(n_clusters=10, random_state=0).fit_predict(all_data_transform)
    each_cluster = np.empty(10, dtype = object)
    for i in range(10):
        each_cluster[i] = all_data_transform[:,:2][np.array(kmeans_labels) == i]
    f, axarr = plt.subplots(2, 10, figsize=(18, 4), sharey=True)
    for i in range(10):
        a = pca.transform(each_label[i])
        axarr[0][i].scatter(a.T[0], a.T[1], s = 1)
    for i in range(10):
        axarr[1][i].scatter(each_cluster[i].T[0], each_cluster[i].T[1], s = 1)
    #plt.show()
    coincidence_matrix = np.zeros((10,10)).astype(int)
    for i in range(16000):
        coincidence_matrix[all_labels[i], kmeans_labels[i]]+=1
    print(coincidence_matrix)
    plt.savefig("labels_vs_kmeans_clusters.jpg")
Esempio n. 7
0
def test_pca(filename):
    # データセットの読み込み
    X = []
    for l in open(filename).readlines():
        data = l.split(' ')
        rec = [float(d) for d in data]
        X.append(rec)

    X = np.array(X)

    # 主成分分析
    # (Trueを指定すると、分散で割ってnormalizeする)
    pca = PCA(X, False)

    print pca.eigenvalues / np.sum(pca.eigenvalues)

    # 寄与率を表示
    accm = []
    total = 0.0
    for v in pca.eigenvalues:
        total += v
        accm.append(total)
    accm /= total
    plt.plot(accm, 'b-')
    plt.show()

    # 主成分空間への写像の表示
    X_pca = pca.project(dim=2)
    plt.plot(X_pca, 'b.')
    plt.show()
def getLayerDimensionality(layer,index,inputDict):
    if layer.name.startswith(inputDict['ValidDimLayers']): # Must be either a single string or tuple of strings
        pcaObj = PCA(matrix=layer.get_weights()[0])
        message = "Layer {}: Dimensionality: {:4.2f}".format(index,pcaObj.dimensionality())
    else:
        message = "Layer {}: Dimensonality is N/A".format(index)
    return message        
Esempio n. 9
0
def main(args):
    # Read data file into numpy matrices
    with gzip.open(args.mnist_train_data, 'rb') as in_gzip:
        magic, num, rows, columns = struct.unpack('>IIII', in_gzip.read(16))
        all_data = np.array([
            np.array(
                struct.unpack('>{}B'.format(rows * columns),
                              in_gzip.read(rows * columns)))
            for _ in range(16000)
        ])
    with gzip.open(args.mnist_train_labels, 'rb') as in_gzip:
        magic, num = struct.unpack('>II', in_gzip.read(8))
        all_labels = struct.unpack('>16000B', in_gzip.read(16000))
    zeros = all_data[np.array(all_labels) < 0.5]
    #plt.imshow(all_data[0].reshape(28,28))
    #plt.show()
    pca = PCA(5)
    pca.fit(all_data)
    print(pca.return_components().shape)
    components = pca.return_components().reshape(5, 28, 28)
    f, axarr = plt.subplots(1, 5, figsize=(18, 4), sharey=True)
    for i in range(5):
        axarr[i].imshow(components[i])
        print(i, components[i].shape)
        axarr[i].set_aspect('equal')
        axarr[i].set_title('Component {}'.format(i + 1))
    plt.tight_layout()
    name = 'TODO'  # TODO: Remplace name with your name
    plt.savefig('comps-{}.png'.format(name), dpi=320)
Esempio n. 10
0
def test_PCA_dtype():
    """
    Test that the initialization of a PCA class throws a type error for 
    things that are not pandas dataframes
    """
    some = "A wrong data type of type string"
    with pytest.raises(TypeError):
        PCA(some)
Esempio n. 11
0
def train_PCA_train():
    """
    Test that PCA has a working train abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = PCA(some)
    assert m.train()
Esempio n. 12
0
def compress_images(DATA, k):
    pca = PCA(DATA, k)

    reconst = pca.perform_PCA()

    reconst = rescale_images(reconst)

    save_images(reconst)
Esempio n. 13
0
def run_pca():
    data = Data(FILENAME)
    d = 2
    pca = PCA()
    pca.train(data.x1.T, d)

    plt.plot(pca.pc[0], pca.pc[1], 'ro')
    plt.savefig("pca")
    plt.clf()
Esempio n. 14
0
    def fit(self, X, y):
        self.pca = PCA(n_components=self.pca_components).fit(X)
        pca_projected = self.pca.project(X)

        self.lda = LDA(n_components=self.n_components).fit(pca_projected, y)

        self.subspace = np.dot(self.pca.pro_subspace, self.lda.pro_subspace)

        return self
Esempio n. 15
0
def test_PCA_init():
    """
    Given a pandas dataframe, test the creation of a PCA class.
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = PCA(some)
    data_2 = m.getData()
    assert some.equals(data_2)
def reduceLayerDimensionality(layer,index,inputDict):
    if layer.name.startswith(inputDict['ValidDimLayers']): # Must be either a single string or tuple of strings
        weights = layer.get_weights()
        pcaObj  = PCA(matrix=weights[0])
        weights[0] = pcaObj.filterMatrix(n=pcaObj.computeTargetPCs(targetRatio=inputDict['targetRatio']))
        layer.set_weights(weights)   
        message = "Layer {}: Reduced layer dimensionality".format(index)
    else:
        message = "Layer {}: Dimensionality unchanged".format(index)
    return(message)
Esempio n. 17
0
def test():
    a = array([[5, 9, 7], [3, 7, 4], [2, 3, 9]])
    pca = PCA(a, 2)
    print "mean:  %s" % pca.mean
    print "covar: %s" % pca.covar
    print "eval:  %s" % pca.eval
    print "evec:  %s" % pca.evec
    print "esort: %s" % pca.esort
    print "pc:    %s" % pca.pc
    print "pca:   %s" % pca.pca
Esempio n. 18
0
 def model(self, k):
     pca = PCA(self.X)
     U, S, V, compare = pca.SVDdecompose()  # 不可去,会用到计算得到的结果
     # 得到得分矩阵和载荷矩阵
     T, P = pca.PCAdecompose(k)
     #print("得分矩阵T: ", T)
     #print("载荷矩阵P: ", P)
     mlr = MLR(T, self.Y)
     mlr.modelling()
     self.A = np.dot(P, mlr.A)
Esempio n. 19
0
def test_PCA_convert():
    """
    Test that PCA has a working test abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = PCA(some)
    m.train(2)
    results = m.convert(some)
    assert results.shape[0] == 4
    assert results.shape[1] == 2
Esempio n. 20
0
    def __init__(self, index_test):

        data_test_ke = data_test[int(index_test)]
        pca = PCA(data_train)
        orang, pose = pca.calc_pca(data_test_ke)
        Input_LDA = {}
        Input_LDA['bobot'] = pca.bobot_train
        Input_LDA['proyeksi'] = pca.matrix_proyeksi
        Input_LDA['jumlah_kelas'] = ORL_face.data.shape[
            0]  # jumlah semua kelas(40) bukan pose
        Input_LDA['jumlah_pose'] = len(
            ORL_face.list_data_train)  # jumlah semua  pose train
        Input_LDA['data_train'] = data_train
        self.input_LDA = Input_LDA

        jumlah_kelas = self.input_LDA['jumlah_kelas']
        jumlah_pose_train = self.input_LDA['jumlah_pose']
        jumlah_data = jumlah_kelas * jumlah_pose_train

        self.proyeksi_pca_baru = self.get_proyeksi_pca_baru(
            self.input_LDA['proyeksi'], jumlah_data, jumlah_kelas)
        self.input_LDA = self.get_input_LDA(self.input_LDA['data_train'],
                                            self.proyeksi_pca_baru)
        self.rata_per_kelas = self.get_rata_tiap_kelas(self.input_LDA,
                                                       jumlah_kelas,
                                                       jumlah_pose_train)
        self.rata_total_kelas = self.get_rata_total_kelas(self.input_LDA)
        self.Sb = self.get_between_class_scatter(self.rata_per_kelas,
                                                 self.rata_total_kelas,
                                                 jumlah_kelas)
        self.Sw = self.get_within_class_scatter(self.input_LDA,
                                                self.rata_per_kelas,
                                                jumlah_data, jumlah_kelas,
                                                jumlah_pose_train)
        self.eigen_value, self.eigen_vector = self.get_eigen(self.Sb, self.Sw)
        self.descending_eigen_vector = self.descending(self.eigen_value,
                                                       self.eigen_vector)
        self.wFid = np.transpose(
            self.descending_eigen_vector[:, 0:jumlah_kelas - 1])
        self.proyeksi = self.get_proyeksi(self.wFid, self.proyeksi_pca_baru)
        self.bobot_train = self.get_bobot(data_train, self.proyeksi)
        print("\nLDA", "==" * 30)
        print("proyeksi lama", self.proyeksi_pca_baru.shape)
        print("input LDA", self.input_LDA.shape)
        print("rata per kelas", self.rata_per_kelas.shape)
        print("rata semua kelas", self.rata_total_kelas.shape)
        print("Sb", self.Sb.shape)
        print("Sw", self.Sw.shape)
        print("eva", self.eigen_value.shape)
        print("eve", self.eigen_vector.shape,
              self.descending_eigen_vector.shape)
        print("wFid", self.wFid.shape)
        print("proyeksi", self.proyeksi.shape)
        print("bobot", self.bobot_train.shape)
Esempio n. 21
0
    def isCorrect(self, attemptFile, correctFile, transMatrixFile,
                  standDevFile):
        errors = self.getErrors(correctFile, attemptFile)

        # transform back into array of joint angles
        invTransMatrix = 1 / PCA().readMatrix(transMatrixFile).transpose()
        jointErrors = np.dot(invTransMatrix, np.array(errors))

        # read in standard deviation file to get joint angle error bounds
        sdVector = PCA().readVector(standDevFile)

        # find and report joint angle errors above the "acceptable" threshhold
        numBadJoints = 0
        for i in range(jointErrors.shape[0]):
            if jointErrors[i] > sdVector[i] * 5:
                print(
                    indexJoints.get(i) + ": " +
                    str(jointErrors[i] * 180 / np.pi))
                numBadJoints += 1

        if numBadJoints == 0: print("true")
def get_descriptors(img, imageName, database):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    img = clahe.apply(img)
    img = image_enhance.image_enhance(img)
    img = np.array(img, dtype=np.uint8)

    # Threshold
    ret, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)

    # Normalize to 0 and 1 range
    img[img == 255] = 1

    # Thinning
    skeleton = skeletonize(img)
    skeleton = np.array(skeleton, dtype=np.uint8)
    skeleton = removedot(skeleton)

    # Creating Block Size of 144 x 96 total of 8 blocks for an image and then generating descriptors and keypoints
    # Storing these 4 descriptors and 8 keypoints of one image in a Mainlist and key(image name) is associated to represent this
    # list in a dictionary. So we store all these lists of individual in dictionary and pickling dictionary

    # Creating List Format ( [[keypoints][descriptors]]): List initialization
    DescriptorList = list()

    for i in range(0, 400, 200):
        for j in range(0, 274, 137):
            blockImg = img[i:i + 200, j:j + 137]
            # Harris corners
            harris_corners = cv2.cornerHarris(blockImg, 3, 3, 0.04)
            harris_normalized = cv2.normalize(harris_corners, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32FC1)
            threshold_harris = 125;
            # Extract keypoints
            keypoints = []
            for x in range(0, harris_normalized.shape[0]):
                for y in range(0, harris_normalized.shape[1]):
                    if harris_normalized[x][y] > threshold_harris:
                        keypoints.append(cv2.KeyPoint(y, x, 1))
            # Define descriptor
            orb = cv2.ORB_create()
            # Compute descriptors
            _, des = orb.compute(blockImg, keypoints)

            # pca = PCA(2)  # project from 32 to 2 dimensions
            # projected = pca.fit_transform(des)
            # print(des.shape)
            # print(projected.shape)
            Reduced_des = PCA(des)
            print(type(Reduced_des))
            print(Reduced_des.shape)
            DescriptorList.append(Reduced_des)

    database.update({imageName: DescriptorList})
Esempio n. 23
0
def main():
    dim = 2
    num_class = 3
    dataset_dir = '../input/wine.csv'
    train_x, train_y, raw_data = data_loader(dataset_dir)
    pca = PCA(first_k=dim, use_threshold=False, threshold=0.5)
    proj = pca.fit(train_x)
    kmeans = KMeans(K=num_class)
    center, predict_y = kmeans.fit(proj)
    result = evaluate(proj, train_y, predict_y, k=num_class)
    visualization(center, proj, predict_y, dim)
    save_to_csv(raw_data, predict_y)
    print(result)
Esempio n. 24
0
def analysisPCA(cryo_data, normalize=True):
    ### Get results on my own PCA on this dataset
    new_data = PCA(cryo_data, normalize=normalize)
    plotResults_2D(new_data, cryo_data.iloc[:,-1], 'Custom PCA Results on cryo Dataset - Normalized = '+str(normalize))

    ### Get results to compare to using the sklearn version of PCA on this dataset
    pca = sklearn_PCA(n_components=2)
    if normalize:
        sklearn_data = sklearn_SS().fit_transform(cryo_data.iloc[:,:-1])
        sklearn_new_data = pca.fit_transform(sklearn_data)
    else:
        sklearn_new_data = pca.fit_transform(cryo_data.iloc[:,:-1])
    plotResults_2D(pd.DataFrame(sklearn_new_data), cryo_data.iloc[:,-1], 'Sklearn PCA Results on cryo Dataset - Normalized = '+str(normalize))
Esempio n. 25
0
def analysisEM_GMM(cryo_data, use_PCA=True, normalize=True, title="EM_GMM Results"):
    ### Define a seed that doesn't push two Gaussians right next to each other
    np.random.seed(1)

    ### Reduce dimensionality to 2D
    new_data = []
    if use_PCA:
        new_data = PCA(cryo_data, normalize=normalize)
    else: ### use_LDA
        new_data = LDA(cryo_data, user_dims=2, normalize=normalize)

    ### Run the EM_GMM algorithm to attempt to classify our data points
    EM_GMM(new_data, cryo_data.iloc[:,-1], 2, max_iters=10, title=title)
Esempio n. 26
0
def pca_data(train_x, test_x, fold):
    pca = PCA()
    train_x, test_x = pca.process(train_x, test_x)
    #with open('train_pca_'+str(fold)+'.pkl', 'wb') as f1:
        #pickle.dump(train_x, f1)
    #with open('test_pca_'+str(fold)+'.pkl', 'wb') as f2:
        #pickle.dump(test_x, f2)

    #with open('train_pca_'+str(fold)+'.pkl', 'rb') as f1:
        #train_x = pickle.load(f1)
    #with open('test_pca_'+str(fold)+'.pkl', 'rb') as f2:
        #test_x = pickle.load(f2)
    train_x = train_x.astype(np.float, copy=True)
    test_x = test_x.astype(np.float, copy=True)
    return train_x, test_x
Esempio n. 27
0
 def computePCs(self):
     # 确定主成分
     Percentage = 0.95
     pca = PCA(self.X)
     U, S, V, compare = pca.SVDdecompose()
     self.kcount = compare.__len__()  # 记录主成分最大数
     comSum = 0
     cSum = sum(compare)
     for i in compare:
         comSum += i
         if comSum / cSum >= Percentage:
             PCs = int(np.where(compare == i)[0][-1]) + 1
             #print("主成分数k: ", PCs)
             self.k = PCs  # 记录符合条件的最佳主成分数
             break
Esempio n. 28
0
    def recommend_songs(self, songnumber):
        if use_pca:
            # Get reduced (2 dimensions) data using PCA
            # start_time()
            self.transformed = PCA(self.X)
            # stop_time("PCA")
        elif use_lem:
            # Get reduced (2 dimensions) data using LEM
            # start_time()
            self.transformed = LEM(self.X)
            # stop_time("LEM")

        # Get seed data point
        self.p = self.transformed[songnumber]
        # Get 20 nearest neighbors of seed
        self.idx = kNN(self.transformed, self.p, 20)[0]
Esempio n. 29
0
def test_pca(n, m):
    # n: num of row
    # m: num of column
    for i in tqdm(range(n * m)):
        # make some toy data for random test
        test_data = np.random.rand(10, 100)
        # test_data: [10, 100]
        # set pca
        pca = decomposition.PCA(n_components=2)
        new_data = pca.fit_transform(test_data)
        # new_data: [10, 2]
        new_data_homemade = PCA(test_data, 2)
        # new_data_homemade: [10, 2]
        plt.subplot(n, m, i+1)
        plt.scatter(new_data[:, 0], new_data[:, 1], c='blue')
        plt.scatter(new_data_homemade[:, 0], new_data_homemade[:, 1], c='red')
    plt.show()
Esempio n. 30
0
 def test_pca2(self):
     pic_num = END_INDEX - START_INDEX
     images = loadFace()
     print(images)
     m, n = images[0].shape
     images_in = images.copy()
     #针对不同图片的降维,这会使图片趋同
     pca = PCA(np.array([[image.reshape(-1) for image in images_in]]))
     pca_ims = pca.ret()
     for i in range(START_INDEX, END_INDEX):
         before_pca = Image.fromarray(images[i])
         after_pca = Image.fromarray(pca_ims[0][i].reshape(m, n))
         fig = plt.figure('pca')
         ax = fig.add_subplot(pic_num, 2, i * 2 + 1 - START_INDEX * 2)
         ax.imshow(before_pca, cmap='gray', vmin=0, vmax=255)
         ax = fig.add_subplot(pic_num, 2, i * 2 + 2 - START_INDEX * 2)
         ax.imshow(after_pca, cmap='gray', vmin=0, vmax=255)
     plt.show()