Python PCA Examples, pca.PCA Python Examples

Example #1

0

Show file

    def compress(self, X):
        n = X.shape[0]
        # nearest_neighbours = np.zeros((n, self.nn))

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.

        adjacency_matrix = np.zeros((n, n))
        nearest_neighbours = self.knn(X)
        for i, j in enumerate(nearest_neighbours):
            for neighbour in j:
                adjacency_matrix[i, neighbour] = D[i, neighbour]
                adjacency_matrix[neighbour, i] = D[neighbour, i]

        dijkstra = utils.dijkstra(adjacency_matrix)

        dijkstra[np.isinf(dijkstra)] = dijkstra[~np.isinf(dijkstra)].max()
        # Initialize low-dimensional representation with PCA
        Z = PCA(self.k).fit(X).compress(X)

        # Solve for the minimizer
        z = find_min(self._fun_obj_z, Z.flatten(), 500, False, dijkstra)
        Z = z.reshape(n, self.k)
        return Z

Example #2

0

Show file

    def hullselect(self):
        def selectHullPoints(data, n=20):
            """ select data points for pairwise projections of the first n
            dimensions """

            # iterate over all projections and select data points
            idx = np.array([])

            # iterate over some pairwise combinations of dimensions
            for i in combinations(range(n), 2):

                # sample convex hull points in 2D projection
                convex_hull_d = quickhull(data[i, :].T)

                # get indices for convex hull data points
                idx = np.append(idx, dist.vq(data[i, :], convex_hull_d.T))
                idx = np.unique(idx)

            return np.int32(idx)

        # determine convex hull data points only if the total
        # amount of available data is >50
        #if self.data.shape[1] > 50:
        pcamodel = PCA(self.data, show_progress=self._show_progress)
        pcamodel.factorize()

        idx = selectHullPoints(pcamodel.H, n=self._base_sel)

        # set the number of subsampled data
        self.nsub = len(idx)

        return idx

Example #3

0

Show file

    def __init__(self, img_dataset: np.ndarray):
        """
        :param img_dataset: An image dataset, which is a matrix with the shape of (N x H x W), where:
                - N: number of images
                - H: height of images
                - W: width of images
                - each item of the matrix is an real value between 0 and 1
            Notes: All images should have same width and height
        """
        # Get the shape of the input data
        super().__init__()
        assert len(img_dataset.shape) == 3
        self._n_samples, self._height, self._width = img_dataset.shape
        self.logger.info({
            'msg': 'Image dataset shape',
            'shape': img_dataset.shape
        })

        self._n_features = self._height * self._width

        # Flatten the images of shape (height, width) to vectors of length height x width
        self._flatten_dataset = img_dataset.reshape(
            (self._n_samples, self._n_features))

        # Build the PCA transformer
        self._pca_transformer = PCA(self._flatten_dataset)

Example #4

0

Show file

File: manifold.py Project: calvinc03/cs340

    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # Construct nearest neighbour graph
        G = np.zeros([n, n])
        for i in range(n):
            neighbours = np.argsort(D[i])[:self.nn + 1]
            for j in neighbours:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        # Compute ISOMAP distances
        D = utils.dijkstra(G)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z

Example #5

0

Show file

File: manifold.py Project: lishaowen0426/ml

    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X,X)
        D = np.sqrt(D)

        #TODO:
        D = self.construct_dist_graph(X , D)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()


        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z,f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z

Example #6

0

Show file

def toyExample():
    mat = scipy.io.loadmat('../data/toy_data.mat')
    data = mat['toy_data']

    # TODO: Train PCA
    pca = PCA(-1)
    pca.train(data)

    print("Variance of the data")
    # TODO 1.2: Compute data variance to the S vector computed by the PCA
    data_variance = np.var(data, axis=1)
    print(data_variance)
    print(np.power(pca.S, 2) / data.shape[1])
    # TODO 1.3: Compute data variance for the projected data (into 1D) to the S vector computed by the PCA
    Xout = pca.project(data, 1)
    print("Variance of the projected data")
    data_variance = np.var(Xout, axis=1)
    print(data_variance)
    print(np.power(pca.S[0], 2) / data.shape[1])

    plt.figure()
    plt.title('PCA plot')
    plt.subplot(1, 2, 1)  # Visualize given data and principal components
    # TODO 1.1: Plot original data (hint, use the plot_pca function
    pca.plot_pca(data)
    plt.subplot(1, 2, 2)
    # TODO 1.3: Plot data projected into 1 dimension
    pca.S[1] = 0
    pca.plot_pca(Xout)
    plt.show()

Example #7

0

Show file

def faceLoader() -> None:
    '''
    Face loader and visualizer example code
    '''

    gall = importGallery()
    print(gall.shape)

    gall = gall[:, :10]
    print(gall.shape)

    # Show first image
    plt.figure(0)
    plt.title('First face')
    n = 0
    nComponents = 10
    pca = PCA(nComponents)
    face = gall[:, :1]
    print(face.shape)

   # face = face.reshape(24576,1)
   # print(face.shape)
    mu, U, C, data = pca.train(gall)
    alpha = pca.to_pca(data)
  #  print(alpha.shape)


#    faceId = gall.item(n)[0][0]
 #   print('Face got face id: {}'.format(faceId))
    face = alpha[:, :1]
    print(face.shape)
    face = face.reshape(192,128)
    plt.imshow(face, cmap='gray')

    plt.show()

Example #8

0

Show file

    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)
        # D is symmetric matrix

        geoD = np.zeros((n, n))

        # find nn-neighbours
        for i in range(n):
            sort = np.argsort(D[:, i])
            neigh = np.setdiff1d(sort[0:self.nn + 1], i)
            # find the nn+1 smallest indexes that are not i
            for j in range(len(neigh)):
                t = neigh[j]
                geoD[i, t] = D[i, t]
                geoD[t, i] = D[t, i]

        D = utils.dijkstra(geoD)
        # for disconnected vertices (distance is Inf)
        # set their dist = max_dist(graph)
        # to encourage they are far away from each other
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z

Example #9

0

Show file

    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        sorted_indices = np.argsort(D)
        G = np.zeros((n, n))

        for i in range(D.shape[0]):
            for j in range(self.nn + 1):
                G[i, sorted_indices[i, j]] = D[i, sorted_indices[i, j]]
                G[sorted_indices[i, j], i] = D[sorted_indices[i, j], i]

        dist = utils.dijkstra(G)

        dist[np.isinf(dist)] = dist[~np.isinf(dist)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, dist)
        Z = z.reshape(n, self.k)
        return Z

Example #10

0

Show file

File: sub.py Project: SilviaWren/pymf

    def hullselect(self):
        
        def selectHullPoints(data, n=20):
            """ select data points for pairwise projections of the first n
            dimensions """
    
            # iterate over all projections and select data points
            idx = np.array([])

            # iterate over some pairwise combinations of dimensions
            for i in combinations(range(n), 2):

                # sample convex hull points in 2D projection                    
                convex_hull_d = quickhull(data[i, :].T)
            
                # get indices for convex hull data points
                idx = np.append(idx, dist.vq(data[i, :], convex_hull_d.T))
                idx = np.unique(idx)
                
            return np.int32(idx)
    
        
        # determine convex hull data points only if the total
        # amount of available data is >50
        #if self.data.shape[1] > 50:    
        pcamodel = PCA(self.data, show_progress=self._show_progress)        
        pcamodel.factorize()        
    
        idx = selectHullPoints(pcamodel.H, n=self._base_sel)        

        # set the number of subsampled data
        self.nsub = len(idx)
        
        return idx

Example #11

0

Show file

def pic_handle(img_data, sd, ori_size):
    """
    做PCA处理之后，在还原到原来的维度，然后显示，之后输出信噪比
    """
    Pca = PCA(sd, img_data)
    c_data, w_star = Pca.pca()  # 进行pca降维,获取投影矩阵
    w_star = np.real(w_star)
    print(w_star)
    new_data = w_star * w_star.T * c_data + Pca.mean  # 还原到原来的维度
    total_img = []
    # 图片混合
    for i in range(Pca.data_size):
        if len(total_img) == 0:
            total_img = new_data[:, i].T.reshape(ori_size)
        else:
            total_img = np.hstack(
                [total_img, new_data[:, i].T.reshape(ori_size)])
    # 计算信噪比
    print('信噪比:')
    for i in range(Pca.data_size):
        a = psnr(np.array(data[:, i].T), np.array(new_data[:, i].T))
        print('图', i, '的信噪比为:', a, 'dB')
    # 处理图片
    total_img = np.array(total_img).astype(np.uint8)
    cv2.imwrite('pca image.jpg', total_img)  # 图片显示
    cv2.imshow('pca image', total_img)
    cv2.waitKey(0)

Example #12

0

Show file

File: plots.py Project: nilais/bayesian-pca

def plot_iris(y, y_classes, maxit=25, *args, **kwargs):
    # np.random.seed(0)
    fig, ax = plot_grid(5)
    #Variational bayes
    vbpca = VBPCA(y, *args, **kwargs)
    for i in range(maxit):
        vbpca.update()
    plot_scatter(vbpca.transform(), y_classes, ax[0])
    ax[0].set_title('VBPCA')
    #Laplace approximation
    lbpca = LBPCA(y.T)
    lbpca.fit(maxit)
    plot_scatter(lbpca.transform(2).T, y_classes, ax[1])
    ax[1].set_title('LBPCA')
    #Streaming LBPCA
    stream = create_distributed(np.copy(y.T), 10)
    stream.randomized_fit(1)
    plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[2])
    ax[2].set_title('Batch BPCA')
    #Distributed LBPCA
    stream = create_distributed(np.copy(y.T), 10)
    stream.averaged_fit(maxit)
    plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[3])
    ax[3].set_title('Parallel BPCA')
    #PCA
    pca = PCA(y.T)
    plot_scatter(pca.fit_transform().T, y_classes, ax[4])
    ax[4].set_title('PCA')
    plt.show()

Example #13

0

Show file

File: manifold.py Project: jaysc96/CS340

    def compress(self, X):
        n = X.shape[0]
        k = self.k
        K = self.K

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)
        nbrs = np.argsort(D, axis=1)[:, 1:K + 1]
        G = np.zeros((n, n))

        for i in range(n):
            for j in nbrs[i]:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        D = utils.dijkstra(G)
        D[D == np.inf] = -np.inf
        max = np.max(D)
        D[D == -np.inf] = max

        # Initialize low-dimensional representation with PCA
        Z = PCA(k).fit(X).compress(X)

        # Solve for the minimizer
        z = find_min(self._fun_obj_z, Z.flatten(), 500, False, D)
        Z = z.reshape(n, k)
        return Z

Example #14

0

Show file

    def getCentVec(self, contextVecs):

        sample, rank, dim = contextVecs.shape
        contexts = np.reshape(contextVecs, (sample * rank, dim))
        pca = PCA(n_components=1)
        pca.fit(contexts)
        return pca.components_[0]

Example #15

0

Show file

File: logistic_regression.py Project: winston86zhu/LLL

def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Logistic Regression",
                   accuracy=accuracy)

Example #16

0

Show file

File: FrmMatrizX.py Project: stevenlouback/Mestrado

    def testePCA(self):
        pca = PCA()
        matrizX = Matrizx()
        idModelo = self.txtIdModelo.get()

        matrizPrincipal = matrizX.selectMatrizXModeloMMM(idModelo)

        pca.testePCA(matrizPrincipal)

Example #17

0

Show file

 def test_pca(self):
     X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
     pca = PCA(n_comp=2)
     pca.fit(X)
     self.assertEqual(
         np.allclose(pca.explained_variance,
                     np.array([0.9924, 0.0075]),
                     atol=1e-3), True)

Example #18

0

Show file

File: main.py Project: zidik/PatternRecognition_HW_PCA_Combining

def train_classifiers(all_training_idx, samples_training, color_channels):
    logging.debug("Training classifiers..")
    classifiers = []
    for channel in color_channels:
        classifier = PCA(samples_training)
        classifier.train(channel[all_training_idx])
        classifiers.append(classifier)
    return classifiers

Example #19

0

Show file

File: test_pca.py Project: nielsrolf/pca

 def test_transform_inverse_transform(self):
     X = self.data(500)
     pca = PCA(X)
     x = pca.transform(X, ndims=3)
     self.assertTrue(
         np.allclose(np.cov(x.T), np.eye(3), atol=1e-4, rtol=1e-2))
     X_ = pca.inverse_transform(x)
     self.assertTrue(np.allclose(X, X_))

Example #20

0

Show file

def train_models():
    images, labels, labels_dic = collect_dat_set()

    rec_eig = PCA(500, 5)
    if images:
        rec_eig.train(images, labels)

    return rec_eig, labels_dic

Example #21

0

Show file

def generate_pca_embedding_files():
    '''
		Generate PCA embedding csv files for the experiments.
	'''
    raw = genfromtxt('digits-raw.csv', delimiter=',')
    X = raw[:, 2:]
    pca = PCA(10)
    X_new = pca.fit_transform(X)
    raw_new = hstack((raw[:, :2], X_new))
    savetxt('digits-pca-embedding.csv', raw_new, delimiter=',')

Example #22

0

Show file

File: pcaImgTest.py Project: yosinski/assorted_old_messy_RBM_ICA_code

def main():
    random.seed(1)
    img_dim = 15     # 10, 15, ...
    datasets = loadUpsonData('../data/upson_rovio_1/train_%d_50000.pkl.gz' % img_dim,
                             '../data/upson_rovio_1/test_%d_50000.pkl.gz'  % img_dim)
    print 'done loading.'
    train_set_x_data, train_set_y = datasets[0]

    pca = PCA(train_set_x_data)
    print 'done PCA.'

    image = Image.fromarray(tile_raster_images(
             X = train_set_x_data,
             img_shape = (img_dim,img_dim),tile_shape = (10,10),
             tile_spacing=(1,1)))
    image.save(os.path.join(resman.rundir, 'samplesData.png'))

    pyplot.figure()
    pyplot.subplot(221); pyplot.semilogy(pca.var);  pyplot.title('pca.var')
    pyplot.subplot(222); pyplot.semilogy(pca.std);  pyplot.title('pca.std')
    pyplot.subplot(223); pyplot.semilogy(pca.fracVar);  pyplot.title('pca.fracVar')
    pyplot.subplot(224); pyplot.semilogy(pca.fracStd);  pyplot.title('pca.fracStd')
    pyplot.savefig(os.path.join(resman.rundir, 'varstd.png'))
    pyplot.close()

    #font = ImageFont.truetype('/usr/share/fonts/truetype/ttf-lyx/cmr10.ttf', 10)
    font = ImageFont.truetype('/usr/share/texmf/fonts/opentype/public/lm/lmmono12-regular.otf', 14)

    def plotImage(xx, filename, str = None):
        arr = tile_raster_images(X = xx,
                                 img_shape = (img_dim,img_dim),tile_shape = (10,10),
                                 tile_spacing=(1,1))
        arrHeight = arr.shape[0]
        if str is not None:
            arr = vstack((arr, zeros((20, arr.shape[1]), dtype = arr.dtype)))
        image = Image.fromarray(arr)
        if str is not None:
            draw = ImageDraw.Draw(image)
            draw.text((2, arrHeight+2), str, 255, font = font)
            draw = ImageDraw.Draw(image)
        image.save(os.path.join(resman.rundir, filename))

    plotImage(pca.pc().T, 'pc.png')

    for dims in [1, 2, 5, 10, 20, 50, 100, 200, 225]:
        plotImage(pca.pcaAndBack(train_set_x_data, dims),
                  'samplesPCA_%03d.png' % dims,
                  'dims=%d' % dims)

        for ee, epsilon in enumerate([0, 1e-4, 1e-3, 1e-2, 1e-1, 1, 2, 5]):
            plotImage(pca.toZca(train_set_x_data, dims, epsilon = epsilon),
                      'samplesZCA_%03d_%02d.png' % (dims, ee),
                      'dims=%d, eps=%s' % (dims, repr(epsilon)))

Example #23

0

Show file

File: realPcaRbm.py Project: yosinski/assorted_old_messy_RBM_ICA_code

def main():
    whiten = False
    if len(sys.argv) > 1 and sys.argv[1] == '--whiten':
        whiten = True
        del sys.argv[1]
    
    if len(sys.argv) <= 3:
        print 'Usage: %s pcaDims n_hidden learningRate' % sys.argv[0]
        sys.exit(1)
    

    # loads data like datasets = ((train_x, train_y), ([], None), (test_x, None))
    datasets = loadUpsonData('../data/upson_rovio_1/train_15_50000.pkl.gz',
                             '../data/upson_rovio_1/test_15_50000.pkl.gz')
    img_dim = 15   # must match actual size of training data

    print 'done loading.'

    pcaDims = int(sys.argv[1])
    pca = PCA(datasets[0][0])  # train
    datasets[0][0] = pca.toPC(datasets[0][0], pcaDims, whiten = whiten) # train
    datasets[1][0] = pca.toPC(datasets[1][0], pcaDims, whiten = whiten) if len(datasets[1][0]) > 0 else array([]) # valid
    datasets[2][0] = pca.toPC(datasets[2][0], pcaDims, whiten = whiten) # test
    print 'reduced by PCA to'
    print ('(%d, %d, %d) %d dimensional examples in (train, valid, test)' % 
           (datasets[0][0].shape[0], datasets[1][0].shape[0], datasets[2][0].shape[0], datasets[0][0].shape[1]))

    # plot mean and principle components
    image = Image.fromarray(tile_raster_images(
             X = pca.meanAndPc(pcaDims).T,
             img_shape = (img_dim,img_dim),tile_shape = (10,10),
             tile_spacing=(1,1)))
    image.save(os.path.join(resman.rundir, 'meanAndPc.png'))
    
    # plot fractional stddev in PCA dimensions
    pyplot.semilogy(pca.fracStd, 'bo-')
    if pcaDims is not None:
        pyplot.axvline(pcaDims)
    pyplot.savefig(os.path.join(resman.rundir, 'fracStd.png'))
    pyplot.clf()
    
    
    test_rbm(datasets = datasets,
             training_epochs = 45,
             img_dim = img_dim,
             n_input = pcaDims if pcaDims else img_dim * img_dim,
             n_hidden = int(sys.argv[2]),
             learning_rate = float(sys.argv[3]),
             output_dir = resman.rundir,
             quickHack = False,
             visibleModel = 'real',
             initWfactor = .01,
             imgPlotFunction = lambda xx: pca.fromPC(xx, unwhiten = whiten))

Example #24

0

Show file

def toyExample() -> None:
    ## Toy Data Set
    mat = scipy.io.loadmat('../data/toy_data.mat')
    data = mat['toy_data']

    data = importGallery()
    ## limit datafor testing purposes
    data = data[:, :144].T
    print(data.shape)

    ## Iris dataset. Just for testing purposes
    #iris = datasets.load_iris()
    #data = iris['data'].astype(np.float32)  # a 150x4 matrix with features
    #data = data.T
    # TODO: Train PCA
    nComponents = 25

    pca = PCA(nComponents)

    ## 1.1 Calculate PCA manuelly. SVD is following
    #pca.pca_manuel(data)
    ## 1.2 Calculate PCA via SVD
    mu, U, C, dataCenter = pca.train(data)

    ## 2. Transform RAW data using first n principal components
    alpha = pca.to_pca(dataCenter)

    ## 3. Backtransform alpha to Raw data
    Xout = pca.from_pca(alpha)

    print("Variance")
    # TODO 1.2: Compute data variance to the eigenvalue vector computed by the PCA

    print(f'Total Variance: {np.var(data)}')
    print(f'Eigenvalues: {C} \n')

    # TODO 1.3: Compute data variance for the projected data (into 1D) to the S vector computed by the PCA
    print(f'Total Variance Transform: {np.var(alpha)}')
    print(f'Mean Eigenvalues: {np.mean(C)}')

    ## Plot only if fewer than 2 components
    if nComponents == 2:
        plt.figure()
        plt.title('PCA plot')
        plt.subplot(1, 2, 1)  # Visualize given data and principal components
        # TODO 1.1: Plot original data (hint, use the plot_pca function
        pca.plot_pca(data)
        plt.subplot(1, 2, 2)
        # TODO 1.3: Plot data projected into 1 dimension
        pca.plot_pca(Xout)
        plt.show()

        ## Plot variances
    else:
        x = np.arange(1, len(C) + 1)
        plt.bar(x, C)
        plt.show()

Example #25

0

Show file

def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, 0.3)
    knn = KNN(3)
    y_pred = knn.predict(X_test, X_train, y_train)
    accuracy = accuracy_score(y_pred, y_test)
    print("accuracy is ", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="knn", accuracy=accuracy)

Example #26

0

Show file

File: run_me.py Project: neeki-hushyar/PCA_FR

def main(matrix, pcomps):
    pca = PCA(pcomps, matrix)
    # get covariance matrix - saved as instance var
    pca.covariance_matrix()
    # find evecs and evals of covariance matrix
    pca.evecs_and_evals()
    # get top x principal components aka evecs corresponding to top evals
    pca.principal_components()
    # reduce image on those components
    return pca.get_evec_matrix()

Example #27

0

Show file

def getCxtSubspace(wl, dim, var_threshold=0.45):
    emb = []
    for word in wl:
        if (word not in vecDict):
            print "non-exist:", word
            continue
        wordEmbed = dict[word]
        emb.append(wordEmbed)
    emb = np.array(emb)

    pca = PCA()
    pca.fit(emb)
    varList = pca.explained_variance_ratio_
    cand = 0
    varSum = 0
    for var in varList:
        varSum += var
        cand += 1
        if (varSum >= var_threshold):
            break

    pca = PCA(n_components=cand)
    pca.fit(emb)
    top_embed = pca.components_
    print "dim:", len(top_embed.tolist()), cand
    return top_embed.tolist()

Example #28

0

Show file

def Bonus3():
    '''
		Scatter plot of samples projected onto the first 
		two eigenvectors.
	'''
    raw = genfromtxt('digits-raw.csv', delimiter=',')
    X = raw[:, 2:]
    pca = PCA(2)
    X_new = pca.fit_transform(X)
    perm = permutation(X.shape[0])[:1000]
    labels = array(raw[perm, 1], dtype=int)
    colors = rand(10, 3)[labels, :]
    scatter(X_new[perm, 0], X_new[perm, 1], c=colors, alpha=0.9, s=10)
    show()

Example #29

0

Show file

    def compress(self, X):
        n = X.shape[0]
        k = self.k

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # Initialize low-dimensional representation with PCA
        Z = PCA(k).fit(X).compress(X)

        # Solve for the minimizer
        z = find_min(self._fun_obj_z, Z.flatten(), 500, False, D)
        Z = z.reshape(n, k)
        return Z

Example #30

0

Show file

def hack_pca(filename, threshold=0.6):
    '''
    Input: filename -- input image file name/path
    Output: img -- image without rotation
    '''
    img_r = (plt.imread(filename)).astype(np.float64) / 255
    # YOUR CODE HERE
    img = img_r[:, :, 0] * 0.299 + img_r[:, :, 1] * 0.587 + img_r[:, :,
                                                                  2] * 0.114
    H, W = img.shape
    data = []
    for i in range(H):
        # x axis
        for j in range(W):
            # y axis
            if img[i, j] >= threshold:
                data.append([i, j])
    data = np.array(data)
    N = data.shape[0]
    eigvectors, eigvalues = PCA(data)
    (vx, vy) = eigvectors[:, 0]
    (vx, vy) = (vx, vy) if vy >= 0 else (-vx, -vy)
    theta = -math.asin(-vx) * 180 / math.pi
    R = np.array([[vy, vx], [-vx, vy]])  # rotate matrix
    odata = np.matmul(data, R)
    odata -= np.min(odata, axis=0)
    odata = odata.astype(int)
    nH, nW = np.max(odata, axis=0)
    oimg = np.zeros((nH + 1, nW + 1))
    for i in range(N):
        oimg[odata[i, 0], odata[i, 1]] = 1.
    return img, oimg, theta

Example #31

0

Show file

File: hack_pca.py Project: Zzh2000/Course-Review

def hack_pca(filename):
    '''
    Input: filename -- input image file name/path
    Output: img -- image without rotation
    '''
    img_r = (plt.imread(filename)).astype(np.float64)  # 4 channels: R,G,B,A
    img_gray = img_r[:, :, 0] * 0.3 + img_r[:, :, 1] * 0.59 + img_r[:, :,
                                                                    2] * 0.11

    X_int = np.array(np.where(img_gray > 0))
    X = X_int.astype(np.float64)
    D, N = X.shape

    eigen_vec, eigen_val = PCA(X)
    print(eigen_vec, eigen_val)
    Y = np.matmul(X.T, eigen_vec).T

    Y_int = Y.astype(np.int32)
    dmin = np.min(Y_int, axis=1).reshape(D, 1)
    Y_int = Y_int - dmin
    bound = np.max(Y_int, axis=1) + 1
    new_img = np.zeros(bound)
    for t in range(Y_int.shape[1]):
        new_img[tuple(Y_int[:, t])] = img_gray[tuple(X_int[:, t])]
    new_img = new_img.T[::-1, ::-1]

    return new_img

Example #32

0

Show file

def hack_pca(filename):
    '''
    Input: filename -- input image file name/path
    Output: img -- image without rotation
    '''
    img_r = (plt.imread(filename)).astype(np.float64) / 255
    img_r = rgb2gray(img_r)
    plt.imshow(img_r, cmap='gray')
    plt.show()
    m, n = img_r.shape
    xy = []
    xyv = []
    for i in range(m):
        for j in range(n):
            if img_r[i, j] > 0:
                xy.append((i, j))
                xyv.append((i, j, img_r[i, j]))
    xy = np.array(xy)
    vector, value = PCA(xy)
    d = np.array(np.round(np.dot(xy, vector))).astype(np.int)
    min_xy = np.min(d, axis=0)
    d -= min_xy
    max_xy = np.max(d, axis=0)
    img = np.zeros((max_xy[1] + 1, max_xy[0] + 1))
    for i in range(xy.shape[0]):
        img[max_xy[1] - d[i, 1], max_xy[0] - d[i, 0]] = xyv[i][2]
    plt.imshow(img, cmap='gray')
    plt.show()
    return img

Example #33

0

Show file

def main():
    # reduce the dimensionality of the data to two dimension and plot the results.
    data = datasets.load_digits()
    X = data.data
    y = data.target

    # Project the data onto the 2 primary principal components
    X_trans = PCA().transform(X, 2)
    x1 = X_trans[:, 0]
    x2 = X_trans[:, 1]

    cmap = plt.get_cmap('viridis')
    colors = [cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))]

    class_distr = []
    # Plot the different class distributions
    for i, l in enumerate(np.unique(y)):
        _x1 = x1[y == l]
        _x2 = x2[y == l]
        _y = y[y == l]
        class_distr.append(plt.scatter(_x1, _x2, color=colors[i]))

    plt.legend(class_distr, y, loc=1)
    plt.suptitle("PCA Dimensionality Reduction")
    plt.title("Digit Dataset")
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.show()

Example #34

0

Show file

File: context_score.py Project: YuchenLiDaniel/SpellingCorrection

def pcaSenEmb(sent_vecs, var_threshold=0.6):
    """
    output: basis of context space
    """
    pca = PCA()
    pca.fit(sent_vecs)
    var_list = pca.explained_variance_ratio_
    cand = 0
    var_sum = 0
    for var in var_list:
        var_sum += var
        cand += 1
        if (var_sum >= var_threshold):
            break
    basis = pca.components_
    return basis

Example #35

0

Show file

File: pcannclassifier.py Project: gauravmm/NEUR1520-Project

class PCANNClassifier(object):
    def __init__(self, trials, trial_angle, **kwargs):
        if "pca" in kwargs and not kwargs["pca"]:
            self.pca = None;
        else: 
            # Prepare PCA only on training data:        
            self.pca = PCA(np.vstack(trials), None, 0.95);
    
        tr = [];
        tr_a = [];
        
        for t,a in zip(trials, trial_angle):
            # Project it onto PCA space
            p = t;
            if self.pca:
                p = self.pca.proj(t);
            
            for i in range(len(p)):
                tr.append(p[i,:]);
                tr_a.append(a);
                       
        # Prepare NN classifier
        self.nn = NearestNeighbors(n_neighbors=19)
        self.nn.fit(np.vstack(tr));
        self.class_labels = tr_a;

    def classify(self, trials):
        rv = [];        
        for tr in trials:
            p = tr;
            if self.pca:
                p = self.pca.proj(tr);
            p = np.array(p);

            votes = defaultdict(lambda: 0);
            for i in range(len(p)):
                dist, idx = self.nn.kneighbors(p[i,:]);             
                for j in idx[0,:]:
                    votes[self.class_labels[j]] += 1;
                    
            max_a, max_c = max(votes.iteritems(), key = lambda x: x[1]);
            
            rv.append(max_a);
            
        return rv;

Example #36

0

Show file

File: view3d.py Project: gauravmm/NEUR1520-Project

    def __init__(self, trials, bin_width, trial_angles, trial_moves):
        self.trials = trials
        self.bin_width = bin_width
        self.trial_angles = trial_angles
        self.trial_moves = trial_moves

        self.pca = PCA(np.vstack(trials), 3);
        fig = plt.figure()
        self.ax = fig.add_subplot(111, projection='3d')
        self.ax.set_xlabel('PCA 1')
        self.ax.set_ylabel('PCA 2')
        self.ax.set_zlabel('PCA 3')

Example #37

0

Show file

File: chnmf.py Project: absynthe/recsys

    def update_w(self): 
        """ compute new W """
        def select_hull_points(data, n=3):
            """ select data points for pairwise projections of the first n
            dimensions """
    
            # iterate over all projections and select data points
            idx = np.array([])

            # iterate over some pairwise combinations of dimensions
            for i in combinations(range(n), 2):
                # sample convex hull points in 2D projection                    
                convex_hull_d = quickhull(data[i, :].T)
            
                # get indices for convex hull data points
                idx = np.append(idx, vq(data[i, :], convex_hull_d.T))
                idx = np.unique(idx)
                
            return np.int32(idx)
    
        # determine convex hull data points using either PCA or random
        # projections
        method = 'randomprojection'
        if method == 'pca':
            pcamodel = PCA(self.data)        
            pcamodel.factorize(show_progress=False)        
            proj = pcamodel.H
        else:            
            R = np.random.randn(self._base_sel, self._data_dimension)           
            proj = np.dot(R, self.data)
            
        self._hull_idx = select_hull_points(proj, n=self._base_sel)
        aa_mdl = AA(self.data[:, self._hull_idx], num_bases=self._num_bases)

        # determine W
        aa_mdl.factorize(niter=50, compute_h=True, compute_w=True, 
                         compute_err=True, show_progress=False)
            
        self.W = aa_mdl.W        
        self._map_w_to_data()

Example #38

0

Show file

File: analysis.py Project: gsj987/SubWords

 def __analyze(self, img):
   _data = PCA.load_image(img)
   data = self.__featrue.extract(_data)
   min = 65536
   l = None
   _l = 97
   for _d in self.__crops: 
     _m = Analyzer.__compare(_d, data) 
     if _m < min:
       min = _m
       l = chr(_l) 
     _l+=1
   return l

Example #39

0

Show file

File: view3d.py Project: gauravmm/NEUR1520-Project

class View3D(object):
    def __init__(self, trials, bin_width, trial_angles, trial_moves):
        self.trials = trials
        self.bin_width = bin_width
        self.trial_angles = trial_angles
        self.trial_moves = trial_moves

        self.pca = PCA(np.vstack(trials), 3);
        fig = plt.figure()
        self.ax = fig.add_subplot(111, projection='3d')
        self.ax.set_xlabel('PCA 1')
        self.ax.set_ylabel('PCA 2')
        self.ax.set_zlabel('PCA 3')

    def scatter_plot(self):
        for angle, trial in zip(self.trial_angles, self.trials):
            projection = self.pca.proj_whiten(trial)
            projection = np.array(projection)

            xs = np.array(projection[0])
            ys = np.array(projection[1])
            zs = np.array(projection[2])

            self.ax.scatter(xs, ys, zs, c = COLOR_MAP[angle])

        plt.show()

    def line_plot(self):
        for angle, trial in zip(self.trial_angles, self.trials):
            projection = self.pca.proj_whiten(trial)
            projection = np.array(projection)

            xs = np.array(projection[0])
            ys = np.array(projection[1])
            zs = np.array(projection[2])

            self.ax.plot(xs, ys, zs, c = COLOR_MAP[angle])

        plt.show()

Example #40

0

Show file

File: pcannclassifier.py Project: gauravmm/NEUR1520-Project

 def __init__(self, trials, trial_angle, **kwargs):
     if "pca" in kwargs and not kwargs["pca"]:
         self.pca = None;
     else: 
         # Prepare PCA only on training data:        
         self.pca = PCA(np.vstack(trials), None, 0.95);
 
     tr = [];
     tr_a = [];
     
     for t,a in zip(trials, trial_angle):
         # Project it onto PCA space
         p = t;
         if self.pca:
             p = self.pca.proj(t);
         
         for i in range(len(p)):
             tr.append(p[i,:]);
             tr_a.append(a);
                    
     # Prepare NN classifier
     self.nn = NearestNeighbors(n_neighbors=19)
     self.nn.fit(np.vstack(tr));
     self.class_labels = tr_a;

Example #41

0

Show file

File: hchnmf.py Project: ririw/pymf

 def _choose_rule_pca(self, data, show_progress):
     """Our projections are onto the PCA vectors"""
     pca = PCA(data, num_bases=1)
     pca.factorize(show_progress)
     primary_vec = pca.W.reshape(self._data_dimension)
     return self._choose_rule_vecproject(data, primary_vec)

Example #42

0

Show file

File: PCANotepad.py Project: braddengross/ComputerVision

imlist = glob.glob("data/a_thumbs/*.jpg")
#print imlist
im = array(Image.open(imlist[0]))
#imshow(im)
#show()

m, n = im.shape[0:2]
imageCount = len(imlist)

#create matrix to stor all flattened images
imageMatrix = array([array(Image.open(im)).flatten()
                     for im in imlist], 'f')

#perform pca
V, S, immean = PCA.pca(imageMatrix)

#show some images (mean and 7 first modes
figure()
gray()
subplot(2,4,1)
imshow(immean.reshape(m,n))
for i in range(7):
    subplot(2,4,i+2)
    #convert images back to one dimension
    imshow(V[i].reshape(m,n))
    #imshow(imageMatrix[i].reshape(m,n))

#show()

f = open('font_pca_modes.pkl', 'wb')

Example #43

0

Show file

File: realRbm.py Project: yosinski/assorted_old_messy_RBM_ICA_code

from ResultsManager import resman
from pca import PCA



if __name__ == '__main__':
    resman.start('junk', diary = True)
    datasets = loadUpsonData('../data/upson_rovio_1/train_15_50000.pkl.gz',
                             '../data/upson_rovio_1/test_15_50000.pkl.gz')

    #meanTrain = mean(datasets[0][0])
    #stdTrain  = std(datasets[0][0])
    #datasets[0][0] = (datasets[0][0] - meanTrain) / stdTrain
    #datasets[2][0] = (datasets[2][0] - meanTrain) / stdTrain

    pca = PCA(datasets[0][0])
    datasets[0][0] = pca.toZca(datasets[0][0], None, epsilon = .1)
    datasets[2][0] = pca.toZca(datasets[2][0], None, epsilon = .1)

    print 'done loading.'
    
    test_rbm(datasets = datasets,
             training_epochs = 45,
             img_dim = 15,   # must match actual size of training data
             n_hidden = int(sys.argv[1]),
             learning_rate = float(sys.argv[2]),
             output_dir = resman.rundir,
             quickHack = False,
             visibleModel = 'real',
             initWfactor = .01,
             pcaDims = None)

Example #44

0

Show file

File: editing.py Project: crosvera/ProDy

def reduceModel(model, atoms, selstr):
    """Return reduced NMA model.
    
    Reduces a :class:`NMA` model to a subset of *atoms* matching a selection 
    *selstr*.  This function behaves differently depending on the type of the 
    *model* argument.  For ANM and GNM or other NMA models, this functions 
    derives the force constant matrix for system of interest (specified by the 
    *selstr*) from the force constant matrix for the *model* by assuming that 
    for any given displacement of the system of interest, the other atoms move 
    along in such a way as to minimize the potential energy.  This is based on 
    the formulation in in [KH00]_.  For PCA models, this function simply takes 
    the sub-covariance matrix for the selected atoms.

    :arg model: dynamics model
    :type model: :class:`ANM`, :class:`GNM`, or :class:`PCA`
    :arg atoms: atoms that were used to build the model
    :arg selstr: a selection string specifying subset of atoms"""

    linalg = importLA()

    if not isinstance(model, NMA):
        raise TypeError("model must be an NMA instance, not {0:s}".format(type(model)))
    if not isinstance(atoms, (AtomGroup, AtomSubset, AtomMap)):
        raise TypeError("atoms type is not valid")
    if len(atoms) <= 1:
        raise TypeError("atoms must contain more than 1 atoms")

    if isinstance(model, GNM):
        matrix = model._kirchhoff
    elif isinstance(model, ANM):
        matrix = model._hessian
    elif isinstance(model, PCA):
        matrix = model._cov
    else:
        raise TypeError("model does not have a valid type derived from NMA")
    if matrix is None:
        raise ValueError("model matrix (Hessian/Kirchhoff/Covariance) is not " "built")

    system = SELECT.getBoolArray(atoms, selstr)
    other = np.invert(system)
    n_sel = sum(system)
    if n_sel == 0:
        LOGGER.warning("selection has 0 atoms")
        return None
    if len(atoms) == n_sel:
        LOGGER.warning("selection results in same number of atoms, " "model is not reduced")
        return None

    if model.is3d():
        system = np.tile(system, (3, 1)).transpose().flatten()
        other = np.tile(other, (3, 1)).transpose().flatten()
    ss = matrix[system, :][:, system]
    if isinstance(model, PCA):
        eda = PCA(model.getTitle() + " reduced")
        eda.setCovariance(ss)
        return eda, system
    so = matrix[system, :][:, other]
    os = matrix[other, :][:, system]
    oo = matrix[other, :][:, other]
    matrix = ss - np.dot(so, np.dot(linalg.inv(oo), os))

    if isinstance(model, GNM):
        gnm = GNM(model.getTitle() + " reduced")
        gnm.setKirchhoff(matrix)
        return gnm, system
    elif isinstance(model, ANM):
        anm = ANM(model.getTitle() + " reduced")
        anm.setHessian(matrix)
        return anm, system
    elif isinstance(model, PCA):
        eda = PCA(model.getTitle() + " reduced")
        eda.setCovariance(matrix)
        return eda, system

Example #45

0

Show file

File: main.py Project: blckshrk/IFT6390

    def main(self, algo="KNN", textview=None):
        
        # Remplace "print"
        def print_output(text):
            if textview != None:
                buf = textview.get_buffer()
                buf.insert_at_cursor(text + "\n")
                textview.scroll_mark_onscreen(buf.get_insert())
            else:
                log.info(text)
        
        
        # liste des types de set
        if self.validation == 1:
            listeTypesSet = ["train", "validation", "test"]
        else:
            listeTypesSet = ["train", "test"]

        # liste des resultats utilises pour les courbes
        listeRes=[]

        # creation des trainFile et testFile
        log.debug("Construction des fichiers d'entrainement")
        tools.constructLfwNamesCurrent( self.nbExemples )   

        #TODO ca ne sert plus a rien finalement
        ( nbClassesLFW, nbClassesORL ) = tools.trainAndTestConstruction( self.pourcentageTrain, self.nbExemples )

        # Chargement des données
        dataTrain, dataTrainIndices, nClass = tools.loadImageData( "train", self.categorie)
        
        # tranformation pca
        print_output("Calcul des vecteurs propres...")
        pca_model = PCA( dataTrain )
        pca_model.transform() # on transforme les donné dans un le "eigen space"

        ##### Recherche pas KNN
        if algo == "KNN":
            print_output("Début de l'algorithme des K plus proches voisins...")
            
            # On build le model pour recherche par KNN
            knn_model = KNN( pca_model.getWeightsVectors(), dataTrainIndices, nClass, self.K )
            
            # On build le model pour Parzen
            parzen_model = ParzenWindows( pca_model.getWeightsVectors(), dataTrainIndices, nClass, self.Theta )

            ## TEST ###########################
            #TODO Toute cette partie est a revoir pour sortir des graphes
            # de train, validation, test
            for trainTest in listeTypesSet:
                if trainTest == "train":
                    dataTest, dataTestIndices = dataTrain, dataTrainIndices
                else :
                    ### si l'on n'effectue pas de validation on concatene les entrees de test et de validation initiales pour obtenir le test
                    #if "validation" not in listeTypesSet:
                        #dataTestInitial, dataTestInitialIndices, nClass = tools.loadImageData( "test", self.categorie )
                        #dataValidation, dataValidationIndices, nClass = tools.loadImageData( "validation", self.categorie )
                        #dataTest = np.zeros(dataTestInitial.size + dataValidation.size)
                        #dataTestIndices = np.zeros( dataTest.size )
                        #dataTest[ : dataTestInitial.size], dataTestIndices[ : dataTestInitial.size] = dataTestInitial, dataTestInitialIndices
                        #dataTest[dataTestInitial.size : ], dataTestIndices[dataTestInitial.size : ] = dataValidation, dataValidationIndices
                        
                        
                    #else:
                        dataTest, dataTestIndices, nClass = tools.loadImageData( trainTest, self.categorie )
                print_output("Projection des données de test...")
                dataTest_proj = pca_model.getProjection( dataTest )
                

            	# compteurs de bons résultats   
                nbGoodResult = 0
                nbGoodResult2 = 0 
                nbGoodResult3 = 0

                t_start = time.clock()
                for i in range(0, int( dataTest.shape[1] )):

					# k = 1, pour réference
					# on force k
                    knn_model.setK( 1 )
                    result1NN = knn_model.compute_predictions( dataTest_proj[:,i] )
                    if(result1NN == dataTestIndices[i]):
                        nbGoodResult += 1

		            # k = n
		            # replace k a ca position initial
                    knn_model.setK( self.K )
                    resultKNN = knn_model.compute_predictions( dataTest_proj[:,i] )
                    if(resultKNN == dataTestIndices[i]):
                        nbGoodResult2 += 1

                
                    resultParzen = parzen_model.compute_predictions( dataTest_proj[:,i] )
                    if(resultParzen == dataTestIndices[i]):
                        nbGoodResult3 += 1
     
                    out_str = "Classic method: "+ str( result1NN ) +" | KNN method: "+ str( resultKNN ) +" | KNN+Parzen method: "+ str( resultParzen ) +" | Expected: "+ str( dataTestIndices[i] ) +"\n" # +1 car l'index de la matrice commence a 0
                    print_output(out_str)

                resClassic = (float(nbGoodResult) / float(dataTest.shape[1])) * 100.
                out_str = "\nAccuracy with classic method: %.3f" % resClassic + "%\n"
                resKNN = (nbGoodResult2 / float(dataTest.shape[1])) * 100.
                out_str += "Accuracy with KNN method (k="+ str( self.K ) +"): %.3f" % resKNN + "%\n"
                res = (nbGoodResult3 / float(dataTest.shape[1])) * 100.
                out_str += "Accuracy with KNN + Parzen window method (theta="+ str( self.Theta ) +"): %.3f" % res + "%\n"
                print_output(out_str)
                
                t_stop = time.clock()
                log.info("Temps total: %.4fs\n" % float(t_stop-t_start)) 

				#### recupere les valeurs finale de l'erreur
                listeRes.append( 100 - resClassic )
                listeRes.append( 100 - resKNN )
                listeRes.append( 100 - res )

            
        
        #### Recherche pas NNET
        elif algo == "NNET":
			print_output("Début de l'algorithme du Perceptron multicouche...")
			
			# parametre, donnees, etc...
			dataTrain = pca_model.getWeightsVectors()
			dataTrainTargets = (dataTrainIndices - 1).reshape(dataTrainIndices.shape[0], -1)
			#! contrairement au KNN le NNET prends les vecteurs de features en ligne et non pas en colonne
			train_set = np.concatenate((dataTrain.T, dataTrainTargets), axis=1)

                        # recuperation des données de validation
			dataValidation, dataValidationIndices, nClass = tools.loadImageData( "validation", self.categorie )
			print_output("Projection des données de validation...")
			dataValidation_proj = pca_model.getProjection( dataValidation )
			dataValidationTargets = (dataValidationIndices - 1).reshape(dataValidationIndices.shape[0], -1)
			validation_set = np.concatenate((dataValidation_proj.T, dataValidationTargets), axis=1)

			# recuperation des données de test
			dataTest, dataTestIndices, nClass = tools.loadImageData( "test", self.categorie )
			print_output("Projection des données de test...")
			dataTest_proj = pca_model.getProjection( dataTest )
			dataTestTargets = (dataTestIndices - 1).reshape(dataTestIndices.shape[0], -1)
			test_set = np.concatenate((dataTest_proj.T, dataTestTargets), axis=1)

			# On build et on entraine le model pour recherche par KNN
			nnet_model = NeuralNetwork( dataTrain.shape[0], self.n_hidden, nClass, self.lr, self.wd )
                        if self.validation == 1:
                            train_out, valid_out, test_out = nnet_model.train( train_set, self.n_epoch, self.batch_size, valid_set=validation_set, test_set=test_set)
                        else :
                            train_out, test_out = nnet_model.train( train_set, self.n_epoch, self.batch_size, test_set=test_set)

			# affichage des courbes d'entrainement
			x = []
			y = []
			y_err = []
			color = []
			legend = []
			legend_err = []
			filename = IMG_DIR + "Risque__Epoch_"+ str(self.n_epoch) +"_Hidden_"+ str(self.n_hidden) +"_Lr_"+ str(self.lr) +"_L2_"+ str(self.wd) + "_Categorie_" + str(self.categorie) + "_Batch_" + str(self.batch_size) + "_"
			filename_err = IMG_DIR + "Erreur_classification__Epoch_"+ str(self.n_epoch) +"_Hidden_"+ str(self.n_hidden) +"_Lr_"+ str(self.lr) +"_L2_"+ str(self.wd) + "_Categorie_" + str(self.categorie) + "_Batch_" + str(self.batch_size) + "_"

			train_out = np.array(train_out)
			x.append(np.array(xrange(train_out.shape[0])))
		
			# parametres courbes train
			color.append('g-')
			legend.append("R Train")
			filename += "_Train"
			y.append(train_out[:,0])
			y_err.append(train_out[:,1])
			legend_err.append("Err Train")
			filename_err += "_Train"

                        # parametre courbes validation
                        if self.validation == 1:
                            valid_out = np.array(valid_out)
                            x.append(np.array(xrange(valid_out.shape[0])))
                            y.append(valid_out[:,0])
                            y_err.append(valid_out[:,1])
                            color.append('b-')
                            legend.append("R Validation")
                            legend_err.append("Err Validation")
                            filename += "_Validation"
                            filename_err += "_Validation"

			# parametre courbes test
			test_out = np.array(test_out)
			x.append(np.array(xrange(test_out.shape[0])))
			y.append(test_out[:,0])
			y_err.append(test_out[:,1])
			color.append('r-')
			legend.append("R Test")
			legend_err.append("Err Test")
			filename += "_Test"
			filename_err += "_Test"

			
			# affichage
			title = u"\nEpoque: " + str(self.n_epoch) + " - Taille du batch: " + str(self.batch_size) + u" - Neurones cachés: " + str(self.n_hidden) + "\nL2: " + str(self.wd) + " - Taux d'apprentissage: " + str(self.lr) + u" - Catégorie: " + str(self.categorie)
			tools.drawCurves(x, y, color, legend, bDisplay=True, filename=filename, title=title, xlabel="Epoque", ylabel=u"Risque régularisé")
			tools.drawCurves(x, y_err, color, legend_err, bDisplay=True, filename=filename_err, title=title, xlabel="Epoque", ylabel="Erreur classification")

                         #### construction fichier pour courbes ameliorees
                        if self.stock == 1 :
                            fichier = open("curvErrorNNet"+''.join( ''.join( title.split(' ') ).split('\n') ),"w")
                            fichier.write("#epoch errorTrain errorValidation errorTest\n")
                            
                            if len(x) == 3:
                            	for j in range(len( x[0] )):
                            	    fichier.write(str( x[0][j] )+" "+str( y[0][j] )+" "+str( y[1][j] )+" "+str( y[2][j] )+"\n")

                            fichier.close()

                        
			"""
			/!\ Cette partie n'est plus utile car effectué dans le nnet durant le train
			
			## TEST ###########################
			#TODO Toute cette partie est a revoir pour sortir des graphes
			# de train, validation, test
			
			# compteurs de bons résultats   
			nbGoodResult = 0

			for i in range(0, int( dataTest.shape[1] )):

				#
				resultNNET = np.argmax(nnet_model.compute_predictions( dataTest_proj[:,i] ), axis=1)[0]
				if(resultNNET == dataTestTargets[i]):
					nbGoodResult += 1
				out_str = "Result: "+ str( resultNNET ) + " | Expected: "+ str( dataTestTargets[i] ) +"\n" # +1 car l'index de la matrice commence a 0
				print_output(out_str)

			res = (float(nbGoodResult) / float(dataTest.shape[1])) * 100.
			out_str = "\nAccuracy : %.3f" % res + "%\n"
			print_output(out_str)
            """            
   
        return listeRes

Example #46

0

Show file

File: editing.py Project: gokceneraslan/ProDy

def reduceModel(model, atoms, select):
    """Return reduced NMA model.  Reduces a :class:`~.NMA` model to a subset of 
    *atoms* matching *select*.  This function behaves differently depending on 
    the type of the *model* argument.  For :class:`.ANM` and :class:`.GNM` or 
    other :class:`.NMA` models, force constant matrix for system of interest 
    (specified by the *select*) is derived from the force constant matrix for 
    the *model* by assuming that for any given displacement of the system of 
    interest, other atoms move along in such a way as to minimize the potential
    energy.  This is based on the formulation in [KH00]_.  For :class:`.PCA` 
    models, this function simply takes the sub-covariance matrix for selection.

    :arg model: dynamics model
    :type model: :class:`.ANM`, :class:`.GNM`, or :class:`.PCA`
    
    :arg atoms: atoms that were used to build the model
    :type atoms: :class:`.Atomic`
    
    :arg select: an atom selection or a selection string 
    :type select: :class:`.Selection`, str 
    
    :returns: (:class:`.NMA`, :class:`.Selection`)"""
    
    linalg = importLA()

    if not isinstance(model, NMA):
        raise TypeError('model must be an NMA instance, not {0:s}'
                        .format(type(model)))
    if not isinstance(atoms, (AtomGroup, AtomSubset, AtomMap)):
        raise TypeError('atoms type is not valid')
    if len(atoms) <= 1:
        raise TypeError('atoms must contain more than 1 atoms')

    if isinstance(model, GNM):
        matrix = model._kirchhoff
    elif isinstance(model, ANM):
        matrix = model._hessian
    elif isinstance(model, PCA):
        matrix = model._cov
    else:
        raise TypeError('model does not have a valid type derived from NMA')
    if matrix is None:
        raise ValueError('model matrix (Hessian/Kirchhoff/Covariance) is not '
                         'built')

    if isinstance(select, str):
        system = SELECT.getBoolArray(atoms, select)
        n_sel = sum(system)
        if n_sel == 0:
            raise ValueError('select matches 0 atoms')
        if len(atoms) == n_sel:
            raise ValueError('select matches all atoms')

        if isinstance(atoms, AtomGroup):
            ag = atoms
            which = np.arange(len(atoms))[system]
        else:
            ag = atoms.getAtomGroup()
            which = atoms._getIndices()[system]
        sel = Selection(ag, which, select, atoms.getACSIndex())
        
    elif isinstance(select, AtomSubset):
        sel = select
        if isinstance(atoms, AtomGroup):
            if sel.getAtomGroup() != atoms:
                raise ValueError('select and atoms do not match')
            system = np.zeros(len(atoms), bool)
            system[sel._getIndices()] = True 
        else:
            if atoms.getAtomGroup() != sel.getAtomGroup():
                raise ValueError('select and atoms do not match')
            elif not sel in atoms:
                raise ValueError('select is not a subset of atoms')
            idxset = set(atoms._getIndices())
            system = np.array([idx in idxset for idx in sel._getIndices()])
    
    else:
        raise TypeError('select must be a string or a Selection instance')
    
    other = np.invert(system)

    if model.is3d():
        system = np.tile(system, (3,1)).transpose().flatten()
        other = np.tile(other, (3,1)).transpose().flatten()
    ss = matrix[system,:][:,system]
    if isinstance(model, PCA):
        eda = PCA(model.getTitle() + ' reduced')
        eda.setCovariance(ss)
        return eda, system
    so = matrix[system,:][:,other]
    os = matrix[other,:][:,system]
    oo = matrix[other,:][:,other]
    matrix = ss - np.dot(so, np.dot(linalg.inv(oo), os))
    
    if isinstance(model, GNM):
        gnm = GNM(model.getTitle() + ' reduced')
        gnm.setKirchhoff(matrix)
        return gnm, sel
    elif isinstance(model, ANM):
        anm = ANM(model.getTitle() + ' reduced')
        anm.setHessian(matrix)
        return anm, sel
    elif isinstance(model, PCA):
        eda = PCA(model.getTitle() + ' reduced')
        eda.setCovariance(matrix)
        return eda, sel

Example #47

0

Show file

File: config_pca.py Project: capoe/soapxx

kernelpot = KernelPotential(options)

# FILL KERNEL
generate = False
if generate:
    for struct in structures:
        print struct.label    
        kernelpot.acquire(struct, 1., label=struct.label)
        print kernelpot.IX.shape
    np.savetxt('out.kernelpot.ix.txt', kernelpot.IX)
else:
    IX = np.loadtxt('out.kernelpot.ix.txt')
    kernelpot.importAcquire(IX, 1.)

# KERNEL PCA
pca = PCA()
pca.compute(IX, normalize_mean=False, normalize_std=False)
#pca = IPCA()
#pca.compute(IX, normalize_mean=False, normalize_std=False)


# =============================
# CHECK COMPONENT NORMALIZATION
# =============================

ones_vec = np.zeros(567)
ones_vec.fill(1.)
np.savetxt('out.pca.unnorm.txt', pca.unnormBlock(ones_vec))

# =================
# INDEX CUTOFF SCAN