コード例 #1
0
    def test_fitCov(self):
        """ Test PCA using covariance matrix approach. """

        testRequestedDim = 1
        generatedDatasetsGaussian = 10

        # for covariance PCA
        testDataSet = np.array([[0, 1, 2, 3, 4, 0,  0],
                                [0, 0, 0, 0, 0, 1, -1]], dtype=np.float64)
        expRes = np.array([[-1.42857, -0.42857, 0.57143, 1.57143, 2.57143, -1.42857, -1.42857]]) # Beware the 2-dimensionality

        resCov = pca.pca(testDataSet, testRequestedDim, "cov")
        npt.assert_array_almost_equal(expRes, resCov)

        ## with random generated datas
        for i in range (generatedDatasetsGaussian):

            ## non memmap-version
            testDataSet = np.load("unitTest/testData/" + "testGaussianClasses" + str(i+1) + ".npy")
            expRes = np.load("unitTest/testData/" + "testGaussianClassesTransformed" + str(i+1) + ".npy")

            resCov = pca.pca(testDataSet, 1, "cov")
            npt.assert_array_almost_equal(expRes, resCov)

            ## memmap-version
            testDataSet = np.memmap("unitTest/testData/" + "testGaussianClassesMmap" + str(i+1) + ".npy", dtype="float64", mode="r+", shape=(2,2000))

            resCov = pca.pca(testDataSet, 1, "cov")
            npt.assert_array_almost_equal(expRes, resCov)
コード例 #2
0
ファイル: test_pca.py プロジェクト: jw-develop/cs394-projects
def mainTest(X_train, X_test, y_train, y_test, k):
    print("--Test 1--")

    M = 3

    # PCA Work
    print("\nTraining data:")
    comp_1 = pca.pca(X_train, M)
    X_train_t = pca.transform(X_train, comp_1)

    print("\nTesting data:")
    comp_2 = pca.pca(X_test, M)
    X_test_t = pca.transform(X_test, comp_2)

    # Print base results.
    print("\nBefore PCA - Dim ", len(X_train[0]))

    classifier = svm.train(X_train, y_train, k, C=None)
    info = svm.classify(classifier, X_test, return_sums=True)

    printResults(info[1], y_test, info[0])

    # Print transformed results.
    print("After PCA - Dim ", M)
    X_train = X_train_t
    X_test = X_test_t

    classifier = svm.train(X_train, y_train, k, C=None)
    info = svm.classify(classifier, X_test, return_sums=True)

    printResults(info[1], y_test, info[0])
    def predict(self, X, V_truncate, gmm):
        assert X.ndim == 4
        print("Extracting Fisher features on testing data")
        n = X.shape[0]
        ret = []

        local_feature_extractor = load_features.get_feature_extractor(
            self.local_feature_extractor_name)
        local_features = local_feature_extractor.predict(X, unflatten=True)

        if self.local_feature_extractor_name == 'hog':
            # local_features is a 3d array
            _, V_truncate = pca(local_features.reshape(
                -1, local_features.shape[-1]),
                                components=n_components)
        elif self.local_feature_extractor_name == 'sift':
            # local_features is a list of 2d arrays
            _, V_truncate = pca(_concat_2d_arrays(local_features),
                                components=n_components)
        else:
            raise Exception("Unknown local feature extractor")

        local_features_pca = []
        for i in range(n):
            local_features_pca.append(
                numpy.array(numpy.matrix(local_features[i]) * V_truncate))

        fisher_vector = FisherVector(self.nclasses,
                                     len(local_features_pca[0][0]), gmm.pi,
                                     gmm.mu, gmm.sigma)

        for i in tqdm(range(n)):
            ret.append(fisher_vector.predict(local_features_pca[i]))

        return numpy.array(ret)
コード例 #4
0
	def relativeExtremaSegments(self, rawData, maxMin="max", minSegSize=50):
		from scipy.signal import argrelmax, argrelmin
		PCs = pca(rawData, n_components=1)[0]
		if maxMin == 'max':
			return argrelmax(PCs[:,0], order=minSegSize)[0]
		if maxMin == 'min':
			return argrelmin(PCs[:,0], order=minSegSize)[0]
def DimensionalityReduction(d, eigenvalue_filename, eigenvector_filename):

	print "start to PCA"
	d_pca = pca(np.array(d))
	d_eigenvalue = [i[0] for i in d_pca]
	d_eigenvector = [i[1] for i in d_pca]

	if eigenvector_filename.__len__() != 0:
		#there exits an eigenvector filename
		eigenvector_output = open(eigenvector_filename,'w')
		for i in range(len(d_eigenvector)):
			for j in range(len(d_eigenvector[i])):
				eigenvector_output.write(str(d_eigenvector[i][j])+"\t")
			eigenvector_output.write("\n")
		eigenvector_output.close()
	else:
		#this is an empty eigenvector filename
		pass

	eigenvalue_output = open(eigenvalue_filename,'w')
	if eigenvalue_filename.__len__() != 0:
		#there exists an eigenvalue filename
		d_eigenvalue_total = 0.0
		for i in range(len(d_eigenvalue)):
			d_eigenvalue_total += d_eigenvalue[i]
		d_eigenvalue_sum = 0.0
		for i in range(len(d_eigenvalue)):
			d_eigenvalue_sum += d_eigenvalue[i]/d_eigenvalue_total
			print >> eigenvalue_output, d_eigenvalue[i]/d_eigenvalue_total, "\t", d_eigenvalue_sum
		eigenvalue_output.close()
	else:
		#this is an empty eigenvalue filename
		pass

	return d_eigenvalue, d_eigenvector, d_eigenvalue_total
コード例 #6
0
	def PCABySensor(self, data, n_components=3):
		dataBySensor = self.dataBySensor(data)
		pcaDict = {}
		for k,v in dataBySensor.items():
			pcaDict[k] = pca(v,n_components)[0]
		pcaDict['Time'] = dataBySensor['Time']
		return pcaDict
コード例 #7
0
ファイル: plot_output.py プロジェクト: glinka/sloppy_models
def shit_plot():
    optvals = np.genfromtxt('./data/optvals.csv', delimiter=',')
    errs = np.genfromtxt('./data/errs.csv')
    npts = optvals.shape[0]
    npts_toplot = 0
    optvals_toplot = np.empty((npts, 3))
    for i in range(npts):
        if errs[i] < 8e-9:
            optvals_toplot[npts_toplot] = optvals[i]
            npts_toplot = npts_toplot + 1
    print npts_toplot
    optvals_toplot = optvals_toplot[:npts_toplot, :]

    optvals_toplot[:, 2] = optvals_toplot[:, 2] * 1e8
    optvals_toplot[:, 2] = optvals_toplot[:, 2] - np.amin(optvals_toplot[:, 2])
    print optvals_toplot[:, 2]
    sing_vals, right_sing_vect = pca.pca(optvals_toplot)
    print sing_vals
    print right_sing_vect
    fig = plt.figure()
    ax = fig.add_subplot(111)
    proj = np.dot(optvals_toplot, right_sing_vect[:, :2])
    ax.scatter(proj[:, 0], proj[:, 1], c=optvals_toplot[:, 2])
    ax.set_ylim((np.amin(proj[:, 1]), np.amax(proj[:, 1])))
    plt.show(fig)
コード例 #8
0
ファイル: main.py プロジェクト: eugene6124/share
def main():
    print("PCA")
    pca.pca()
    print("RandomForest")
    rf.rf(2)
    print("KNN")
    knn.knn(2)
    print("SVC")
    svc.svc()
    print("GRID_SVC")
    svc.gridSearchScore()
    print("Logistic")
    logistic.Logistic().fit()
    print("DNN Classifier")
    classifier_model = classifier.classifier()
    classifier_model.fit()
コード例 #9
0
ファイル: draw_plot.py プロジェクト: vinay-swamy/iLearn
def plot_clustering_2d(encodings, myCluster, output, **kw):
    if myCluster != 0:
        if kw['sof'] == 'sample':
            data = np.array(encodings)[1:, 1:].astype(float)
        else:
            data = np.array(encodings).T[1:, 1:].astype(float)
        labels = np.array(myCluster)[0:, 1:].reshape(-1, )
        e = ''
        try:
            Y = tsne.tsne(data, 2, 50, 20.0)
        except RuntimeWarning as e:
            Y = pca.pca(data, n_components=2)

        df = pd.DataFrame({'X': Y[:, 0], 'Y': Y[:, 1], 'L': labels})

        fig = plt.figure(0)
        mySet = set(labels)
        if len(mySet) > 5:
            plt.scatter(Y[:, 0], Y[:, 1], 20, labels)
        else:
            for l in mySet:
                newData = df.loc[df.loc[:, "L"] == l, :]
                plt.scatter(np.array(newData.X), np.array(newData.Y), 20, label="Cluster_%s" % l)
        plt.legend(loc='best')
        plt.savefig('%s.png' % output)
        plt.close(0)
コード例 #10
0
ファイル: master.py プロジェクト: buguen/minf
def getLowDimensionalSegments(highDimensionalData,n_components=2,plt=False,title="Latent space segments"):
	(lowDimensionalData,explainedVariance) = pca.pca(highDimensionalData,n_components)
	(mins,maxs) = segment.segmentationPoints(lowDimensionalData[:,0])
	segments = pl.split(lowDimensionalData,maxs)[1:-1]
	if plt:
		plot.plotGridOf2Ds(segments,title)
	return (segments,explainedVariance)
コード例 #11
0
def osp_helper(hsi_data, tgt_sig, kwargs):
    n_dim_ss = kwargs['n_dim_ss']
    # see Eismann, pp670
    n_band, n_pixel = hsi_data.shape
    mu = np.mean(hsi_data, 1)
    mu = mu[:, np.newaxis]
    x = hsi_data - mu

    # get PCA rotation, no dim reduction
    _, _, evecs, _, _ = pca(hsi_data, 1)
    s = tgt_sig - mu

    # get a subspace that theoretically encompasses the background
    B = evecs[:, :n_dim_ss]

    PB = B @ np.linalg.pinv(B.T @ B) @ B.T
    PperpB = np.eye(n_band) - PB

    f = s.T @ PperpB

    osp_data = np.zeros(n_pixel)

    for i in range(n_pixel):
        osp_data[i] = f @ x[:, i]
    return osp_data, {}
コード例 #12
0
ファイル: test_pca.py プロジェクト: glinka/pca
def test_pca():
    """Generates a noisy 2d plane embedded in 3d, then performs PCA to find better coordinates for the data"""
    # define a planar equation
    z = lambda x, y: 3*x + y + 4
    npoints = 200
    stdev = 1.0
    noise = stdev*np.random.normal(size=npoints)
    xvals = np.random.uniform(low=2, high=6, size=npoints)
    yvals = np.random.uniform(low=2, high=10, size=npoints)
    zvals = z(xvals, yvals) + noise

    data = np.transpose(np.array([xvals, yvals, zvals]))
    pcomp, pvar = pca.pca(data, 2)

    # plot projections along various components
    ncomp = pvar.shape[0]
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    cs = ['b', 'r', 'g']
    data_avg = np.average(data, 0)
    centered_data = data - data_avg
    # do not bother plotting "full" projection, which would give back the original data
    # proj = np.dot(pcomp.T, centered_data.T)
    # print np.dot(pcomp.T, pcomp), pvar
    # ax.scatter(proj[0,:]+data_avg[0], proj[1,:]+data_avg[1], 0, c='g')
    proj = np.dot(pcomp, np.dot(pcomp.T, centered_data.T))
    ax.scatter(data[:,0], data[:,1], data[:,2], alpha=1.0, s=80, c='#96031E')
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    # hide labels and grid, too squashed/noisy
    # ax.grid(False)
    ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
    ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
    ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
    plt.tick_params(axis='both', which='major', labelsize=0)
    # ax.scatter(proj[0,:]+data_avg[0], proj[1,:]+data_avg[1], proj[2,:]+data_avg[2], color='g')
    # # plot lines from orig data to planar projection
    # for i in range(npoints):
    #     pts = np.array((proj[:,i] + data_avg, data[i,:]))
    #     ax.plot(pts[:,0], pts[:,1], pts[:,2], c='y')

    # sort based on z val and wireframe
    # data_avg.shape = (3,1)
    # sorted_indices = np.argsort(np.linalg.norm(proj + data_avg, axis=0))
    # proj = proj[:,sorted_indices]

    # fake pca result, true in the limit of infinite data
    # xgrid, ygrid = np.meshgrid(np.linspace(2,6,10), np.linspace(2,10,10))
    # ax.plot_wireframe(xgrid, ygrid, z(xgrid, ygrid), color='#12227A', alpha=0.5)
    # grid_coord = (2,2)
    # for i in range(2):
    #     ax.plot((xgrid[grid_coord], xgrid[grid_coord] + pcomp[0,i]), (ygrid[grid_coord], ygrid[grid_coord] + pcomp[1,i]), (z(xgrid[grid_coord], ygrid[grid_coord]), z(xgrid[grid_coord], ygrid[grid_coord]) + pcomp[2,i]), c='k')

    for ii in xrange(0,360,1):
        ax.view_init(elev=20.0, azim=ii)
        if ii >= 180:
            xgrid, ygrid = np.meshgrid(np.linspace(2,6,10), np.linspace(2,10,10))
            ax.plot_wireframe(xgrid, ygrid, z(xgrid, ygrid), color='#12227A', alpha=0.5)
        plt.savefig('/home/alexander/workspace/sloppy_models/rawlings_model/figs/pca/pca' + str(ii) + '.png')
コード例 #13
0
ファイル: test_pca.py プロジェクト: hovinh/pca
	def test_plot_combinations(self):

		X = load_iris().data
		labels=load_iris().feature_names
		y=load_iris().target

		X = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names, index=load_iris().target)

		param_grid = {
			'n_components':[None, 0.01, 1, 0.95, 2, 100000000000],
			'row_labels':[None, [], y],
			'detect_outliers' : [None, 'ht2','spe'],
			}

		allNames = param_grid.keys()
		combinations = it.product(*(param_grid[Name] for Name in allNames))
		combinations=list(combinations)

		for combination in combinations:
			model = pca(n_components=combination[0])
			model.fit_transform(X)
			assert model.plot()
			assert model.biplot(y=y, SPE=True, hotellingt2=True)
			assert model.biplot3d(y=y, SPE=True, hotellingt2=True)
			assert model.biplot(y=y, SPE=True, hotellingt2=False)
			assert model.biplot(y=y, SPE=False, hotellingt2=True)
			assert model.biplot(y=y, SPE=False, hotellingt2=False)
コード例 #14
0
def segment(k,m, inciset, trainingset, radiographs, colors, leftout, mode = 0):
    
    # get image training set
    trainimgs = [radiographs[i] for i in trainingset]
    
    # read landmarks from file
    lmtrain,lmtest = landmarks.get(trainingset)
    
    # align all landmarks, plot depending on mode
    aligns, means = landmarks.align(lmtrain)
    if mode == 0:
        ui.plotalign(colors, means, aligns)
        
    # do pca, plot depending on mode
    eva, evc = pca.pca(aligns, means)
    if mode == 0:
        ui.plotpca(means,eva,evc)
           
    # get initial estimate, manual or auto depending on mode
    # draw init also depending on mode
    est, greymodels = model.estimate(k, m, inciset, means, trainimgs, lmtrain, radiographs[leftout], colors, mode) 
    if mode == 2:
        ui.plotinit(est, radiographs[leftout], colors, leftout)
        
    # fit init estimate and get plot mask
    if mode == 0 or mode == 1:
        X = fit.fit(est, inciset, eva, evc, means, greymodels, radiographs[leftout], k, m, 3.0)
        mask = ui.plotfit(radiographs[leftout], list(est), X, len(inciset), colors)
    
        return mask
コード例 #15
0
def csd_anomaly(hsi_img, n_dim_bg, n_dim_tgt, tgt_orth):
    """
	Complementary Subspace Detector
	 assumes background and target are complementary subspaces
	 of PCA variance ranked space
	Ref: A. Schaum, "Joint subspace detection of hyperspectral targets," 2004 IEEE Aerospace Conference Proceedings (IEEE Cat. No.04TH8720), 2004, pp. 1824 Vol.3. doi: 10.1109/AERO.2004.1367963

	inputs:
	  hsi_image - n_row x n_col x n_band
	  n_dim_bg - number of leading dimensions to assign to background subspace
	  n_dim_tgt - number of dimensions to assign to target subspace
	              use empty matrix, [], to use all remaining after background assignment
	  tgt_orth - True/False, set target subspace orthogonal to background subspace

	8/7/2012 - Taylor C. Glenn
	5/5/2018 - Edited by Alina Zare
	11/2018 - Python Implementation by Yutai Zhou
	"""
    n_row, n_col, n_band = hsi_img.shape
    n_pixel = n_row * n_col

    hsi_data = hsi_img.reshape((n_pixel, n_band), order='F').T

    # PCA rotation, no reduction
    pca_data, _, evecs, evals, _ = pca(hsi_data, 1)

    # whiten the data so that later steps are equivalent to Mahalanobis distance
    z = np.diag(1 / np.sqrt(evals)) @ pca_data

    # figure out background and target subspaces
    bg_rg = np.array(range(0, n_dim_bg))

    if tgt_orth:
        # set target to orthogonal complement of background
        if n_dim_tgt is None:
            n_dim_tgt = n_band - n_dim_bg
        tgt_rg = np.array(range(n_dim_bg, n_dim_tgt))
    else:
        # target and background overlap
        if n_dim_tgt is None:
            n_dim_tgt = n_band
        tgt_rg = np.array(range(0, n_dim_tgt))

    # set background and target subspaces
    B = evecs[:, bg_rg]
    S = evecs[:, tgt_rg]

    # run the detector
    csd_data = np.zeros(n_pixel)

    for i in range(n_pixel):
        Sz = S.T @ z[:, i]
        Bz = B.T @ z[:, i]

        csd_data[i] = Sz.T @ Sz - Bz.T @ Bz

    csd_out = csd_data.reshape(n_row, n_col, order='F')

    return csd_out
コード例 #16
0
 def __initialise_latent(self, reduced_dimensionality):
     """
     Initialises latent variables with Principal Component Analysis and keeps a copy for resetting purposes.
     """
     if self.__init_latent.shape[0] == 0 or self.__init_latent.shape[
             1] != reduced_dimensionality:
         self.__init_latent = pca(self._Y, reduced_dimensionality)
     self._X = np.copy(self.__init_latent)
コード例 #17
0
ファイル: segment.py プロジェクト: buguen/minf
def manuallySegment(inputFile, listOfSegmentationPoints, outputFilesPrefix):
	data = readRaw(inputFile)[:,4:]
	pcaData = pca(data,3)[0]
	segments = np.split(data,listOfSegmentationPoints)
	pcaSegments = np.split(pcaData,listOfSegmentationPoints)
	for i,seg,pcaSeg in zip(range(len(segments)),segments, pcaSegments):
		np.savetxt("%s%i%s"%(outputFilesPrefix,i,"RAW.txt"),seg, delimiter=",")
		np.savetxt("%s%i%s"%(outputFilesPrefix,i,"PCA.txt"),pcaSeg, delimiter=",")
コード例 #18
0
def nonopt_correlations():
    corr_results = {}
    for i, tsp in enumerate(timbrespace_db.keys()):
        print('Processing', tsp)
        corr_results[tsp] = {}
        target_data = load.timbrespace_dismatrix(tsp, timbrespace_db)
        for rs in sorted(representations):
            aud_repres = load.timbrespace_features(
                tsp,
                representations=[rs],
                window=None,
                timbrespace_db=None,
                verbose=False)[rs]
            tab_red = []
            rs_type = rs.split('_')[-1]
            if rs_type == 'strf':
                n_components = 1
                for i in range(len(aud_repres)):
                    # print('PCA on sound %02i' % (i + 1))
                    strf_reduced = pca.pca(
                        np.absolute(aud_repres[i]),
                        aud_repres[i].shape[1],
                        n_components=n_components).flatten()
                    tab_red.append(strf_reduced / np.max(strf_reduced))
                tab_red = np.transpose(np.asarray(tab_red))
            elif rs_type == 'spectrogram' or rs_type == 'mps':
                for i in range(len(aud_repres)):
                    tab_red.append(aud_repres[i].flatten())
                tab_red = np.transpose(np.asarray(tab_red))
            elif rs_type == 'spectrum':
                for i in range(len(aud_repres)):
                    tab_red.append(aud_repres[i])
                # 128 x nb sounds (time or freq?)
                tab_red = np.transpose(np.asarray(tab_red))
            input_data = tab_red / np.mean(np.std(tab_red, axis=0))

            # plt.plot(input_data)
            # plt.show()
            ndims, ninstrus = input_data.shape[0], input_data.shape[1]
            no_samples = ninstrus * (ninstrus - 1) / 2
            idx_triu = np.triu_indices(target_data.shape[0], k=1)
            target_v = target_data[idx_triu]
            mean_target = np.mean(target_v)
            std_target = np.std(target_v)
            kernel = np.zeros((ninstrus, ninstrus))
            for i in range(ninstrus):
                for j in range(i + 1, ninstrus):
                    kernel[i, j] = np.sum(
                        np.power(input_data[:, i] - input_data[:, j], 2))
            kernel_v = kernel[idx_triu]
            mean_kernel = np.mean(kernel_v)
            std_kernel = np.std(kernel_v)
            Jn = np.sum(
                np.multiply(kernel_v - mean_kernel, target_v - mean_target))
            Jd = (no_samples - 1) * std_target * std_kernel
            corr_results[tsp][rs] = Jn / Jd
            print('  {} : {}'.format(rs, Jn / Jd))
    pickle.dump(corr_results, open('correlations_results.pkl', 'wb'))
コード例 #19
0
ファイル: paper-figs.py プロジェクト: glinka/prefAttachModel
def pca_rho_kappa_embedding_figs():
    """Performs PCA on a collection of network stationary states arising from a range of $m$ and $\kappa$ values, plots projection of data along PC1 and PC2, also plots variances.

    **To generate data:**::

        ./rho_kappa_embedding 0 2000
        cp ./embedding_data/rho_kappa_graph_embeddings.csv ./manuscript-materials/data
        cp ./embedding_data/rho_kappa_params.csv ./manuscript-materials/data

    """
    # project graph embeddings with PCA
    embeddings = np.genfromtxt(data_directory + 'rho_kappa_graph_embeddings.csv', delimiter=',')
    k = 6
    pcs, variances = pca(embeddings, k)
    projections = np.dot(pcs.T, embeddings.T) # (k, n) array

    # plot variances
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(np.arange(1,k+1), variances)
    ax.semilogy(np.arange(1,k+1), variances)
    ax.set_xlabel(r'$i$')
    ax.set_ylabel(r'$\sigma^2_i$')

    # # plot projection along first and second principal component
    params = np.genfromtxt(data_directory + 'rho_kappa_params.csv', delimiter=',')
    # color by log(kappa)
    fs = 64
    s = 50
    fig = plt.figure()
    ax = fig.add_subplot(111)
    c = ax.scatter(projections[0], projections[1], c=np.log10(params[:,1]), s=s)
    cb = fig.colorbar(c)
    cb.set_label(r'$\log(\kappa)$', fontsize=fs)
    ax.set_xlabel(r'$w_1$', fontsize=fs)
    ax.set_ylabel(r'$w_2$', fontsize=fs)
    ax.set_xlim((1.05*np.min(projections[0]), 1.4*np.max(projections[0])))
    ax.set_ylim(bottom=1.05*np.min(projections[1]))
    formatter = FormatAxis(ax, has_zaxis=False)
    formatter.format('x', projections[0], '%d', nticks=3)
    formatter.format('y', projections[1], '%d', nticks=3)
    fig.subplots_adjust(bottom=0.15)

    # color by rho
    fig = plt.figure()
    ax = fig.add_subplot(111)
    c = ax.scatter(projections[0], projections[1], c=params[:,0], s=s)
    cb = fig.colorbar(c)
    cb.set_label(label=r'$\frac{2m}{n}$', fontsize=1.5*fs)
    ax.set_xlabel(r'$w_1$', fontsize=fs)
    ax.set_ylabel(r'$w_2$', fontsize=fs)
    ax.set_xlim((1.05*np.min(projections[0]), 1.4*np.max(projections[0])))
    ax.set_ylim(bottom=1.05*np.min(projections[1]))
    formatter = FormatAxis(ax, has_zaxis=False)
    formatter.format('x', projections[0], '%d', nticks=3)
    formatter.format('y', projections[1], '%d', nticks=3)
    fig.subplots_adjust(bottom=0.15)
    plt.show()
コード例 #20
0
ファイル: mysvm.py プロジェクト: liangkun/ml-play
def test_hand_written(c=200, epsilon=0.0001, max_iter=10000, kernel=linear_kernel,
                      parallel=False, skip_rate=0, vrate=0.90):
    train_dir = 'data/Ch02/digits/trainingDigits'
    test_dir = 'data/Ch02/digits/testDigits'
    begin_progress('Reading train data')
    train_xs, train_ys = load_digits(train_dir, skip_rate)
    end_progress()

    begin_progress('Reading test data')
    test_xs, test_ys = load_digits(test_dir, skip_rate)
    end_progress()

    pcs, means = pca.pca(train_xs, vrate=vrate)
    train_xs = pca.transform(train_xs, pcs, means)
    test_xs = pca.transform(test_xs, pcs, means)

    print("Dimension reduction from {} to {}".format(*pcs.shape))

    begin_progress('Train svms')
    num_classes = 10
    svms = [None] * num_classes

    if not parallel:
        k_cache = create_k_cache(train_xs, kernel)
        for i in range(num_classes):
            k, os = train_svm(i, train_xs, train_ys, c, epsilon, max_iter, kernel, k_cache)
            svms[k] = os
            progress()
    else:
        def done_hook(future):
            nonlocal svms
            i, svm = future.result()
            svms[i] = svm
            progress()

        with ProcessPoolExecutor(max_workers=5) as executor:
            futures = [executor.submit(train_svm,
                                       i,
                                       train_xs,
                                       train_ys,
                                       c,
                                       epsilon,
                                       max_iter,
                                       kernel)
                       for i in range(num_classes)]
            for future in futures:
                future.add_done_callback(done_hook)

    end_progress()

    print('Testing svms:')
    train_er = multi_get_error_rate(svms, train_xs, train_ys) * 100
    print('SVM handwritten error rate on train set: %{}'.format(train_er))
    test_er = multi_get_error_rate(svms, test_xs, test_ys) * 100
    print('SVM handwritten error rate on test set: %{}'.format(test_er))
    return train_er, test_er
コード例 #21
0
ファイル: pca_imagecube.py プロジェクト: jmilou/pca
 def __init__(self,datacube,method='cor',verbose=True,radii=None,\
              path=None,name='PCA',header=None):
     """
     Constructor of the pca_imagecube class.
     Input:
         - datacube: a 3d numpy array
         - method: 'cov' for covariance (default option), 'cor' for correlation 
             or 'ssq' for sum of squares
         - verbose: True or False if you want some information printed on the terminal
         - radii: an array containing the radii in pixels of the annuli in 
             which the PCA must be calculated. For instance: radii=[10,100,200] means
             the PCA will be computed in 2 annuli defined by 10px-100px and 100px-200px.
             By default, assumes te whole image is used.
         - path: the path where results must be saved. If no path is specified, 
                 then results can't be saved. 
         - name: a string, all output files will start with this name. 
             A good practice here is to use the name of the target and or date 
             of observation. By default it is 'PCA'
         - header: the header to use for the output files.
     """
     if datacube.ndim != 3:
         raise IndexError('The input datacube must be a 3D  numpy array !')
     self.nframes, self.ny, self.nx = datacube.shape
     if radii is None:
         radii = [
             0,
             int(np.round(np.sqrt((self.ny // 2)**2 + (self.nx // 2)**2)))
         ]
     self.method = method
     self.set_path(path, verbose=verbose)
     self.set_prefix(name + '_' + method + '_' +
                     '-'.join(['{0:d}'.format(i) for i in radii]))
     self.header = header
     distarr = distance_array((self.ny, self.nx), verbose=False)
     self.region_map = np.zeros((self.ny, self.nx), dtype=int)
     self.nb_annuli = len(radii) - 1
     self.Nobj_array = np.ndarray(self.nb_annuli)
     self.pca_array = []
     self.x_indices_array = []
     self.y_indices_array = []
     if verbose:
         print('There are {0:d} frames and {1:d} regions.'.format(
             self.nframes, self.nb_annuli))
     for i in range(self.nb_annuli):
         y_indices, x_indices = np.where(
             np.logical_and(distarr >= radii[i], distarr < radii[i + 1]))
         self.y_indices_array.append(y_indices)
         self.x_indices_array.append(x_indices)
         self.Nobj_array[i] = len(y_indices)
         self.region_map[y_indices, x_indices] = i + 1
         data = datacube[:, y_indices,
                         x_indices].T  # Transpose is used to get a shape (Nobj x Katt) where Katt is the number of frames of the datacube
         self.pca_array.append(pca.pca(data, method=method,
                                       verbose=verbose))
         if verbose:
             self.pca_array[i].print_explained_inertia(modes=5)
コード例 #22
0
def pca_features(image):
    im = np.array(Image.open(image).convert('L'))
    process_image(image, 'empire.sift')
    features = np.loadtxt('empire.sift')
    os.remove('empire.sift')
    V, S, m = pca.pca(features)
    V = V[:50]
    features = array([dot(V, f - m) for f in features])
    np.savetxt('fea.txt', features)
    print 'done'
コード例 #23
0
ファイル: test_pca.py プロジェクト: hovinh/pca
	def test_for_outliers_and_transparency(self):	

		X = np.array(np.random.normal(0, 1, 500)).reshape(100, 5)
		outliers = np.array(np.random.uniform(5, 10, 25)).reshape(5, 5)
		X = np.vstack((X, outliers))

		model = pca(alpha=0.05)
		# Fit transform
		out = model.fit_transform(X)
		assert X[out['outliers']['y_bool'],:].shape[0]==5
		assert out['outliers'].shape[1]==5

		######## TEST FOR HT2 #########
		model = pca(alpha=0.05, detect_outliers=['ht2'])
		# Fit transform
		out = model.fit_transform(X)
		assert X[out['outliers']['y_bool'],:].shape[0]==5

		######## TEST FOR SPE/DMOX #########
		model = pca(alpha=0.05, detect_outliers=['spe'])
		# Fit transform
		out = model.fit_transform(X)
		assert 'y_bool_spe' in out['outliers'].columns

		######## TEST WITHOUT OUTLIERS #########
		model = pca(alpha=0.05, detect_outliers=None)
		# Fit transform
		out = model.fit_transform(X)
		assert out['outliers'].empty

		######## TEST FOR TRANSPARENCY WITH MATPLOTLIB VERSION #########
		assert model.scatter(alpha_transparency=0.1)
		assert model.scatter3d(alpha_transparency=0.1)
		assert model.biplot(alpha_transparency=0.1)
		assert model.biplot3d(alpha_transparency=0.1)
		assert model.scatter(alpha_transparency=None)
		assert model.scatter3d(alpha_transparency=None)
		assert model.biplot(alpha_transparency=None)
		assert model.biplot3d(alpha_transparency=None)
		assert model.scatter(alpha_transparency=0.5)
		assert model.scatter3d(alpha_transparency=0.5)
		assert model.biplot(alpha_transparency=0.5)
		assert model.biplot3d(alpha_transparency=0.5)
コード例 #24
0
ファイル: main.py プロジェクト: glinka/sloppy_models
def pca_contour():
    """Plots the pca of the ellipsoid, which projects into a two-dimensional ellipse as expected"""
    data = np.genfromtxt('./data/output/contour_KVSt_to_dmaps.csv', skip_header=1, delimiter=',')
    npts_to_dmaps = 5000
    slice_size = data.shape[0]/npts_to_dmaps
    data = data[::slice_size]
    npts = data.shape[0]
    ndims = 2
    pcs, variances = pca(data, ndims)
    plot_dmaps.plot_xy(np.dot(pcs[:,0].T, data.T), np.dot(pcs[:,1].T, data.T), color=data[:,1]/data[:,2], scatter=True)
コード例 #25
0
ファイル: sift.py プロジェクト: Michaelht92/CBIC
def pca_features(image):
    im = np.array(Image.open(image).convert('L'))
    process_image(image, 'empire.sift')
    features = np.loadtxt('empire.sift')
    os.remove('empire.sift')
    V, S, m = pca.pca(features)
    V = V[:50]
    features = array([dot(V, f-m) for f in features])
    np.savetxt('fea.txt', features)
    print 'done'
コード例 #26
0
def main():
    # prepare the dataset
    print('Parsing training set...')
    train_dir = os.path.join(args.data_root, 'OLHWDB1.1trn')
    train_set = olhwdb.OLHWDB(train_dir)

    print('Parsing test set...')
    test_dir = os.path.join(args.data_root, 'OLHWDB1.1tst')
    test_set = olhwdb.OLHWDB(test_dir)

    train_x = torch.from_numpy(train_set.x)  #.to(args.gpu_id)
    train_y = train_set.y

    test_x = torch.from_numpy(test_set.x)  #.to(args.gpu_id)
    test_y = test_set.y

    # Compress the data with PCA if args.dims is set
    if args.dims is not None:
        assert (args.dims < train_x.size(1))
        print('Compressing dataset to %d dims...' % (args.dims))
        train_x, sigma, mean = pca.pca(train_x, args.dims)
        test_x = test_x - mean.view(1, -1)
        test_x = test_x.matmul(sigma)

    # create lvq model
    net = lvq.LVQ(train_x.size(1), train_set.num_classes, args.k)
    # net.to(args.gpu_id)

    # init lvq prototypes with kmeans
    net.init_prototypes(train_x, train_y, args.kmeans_iter)

    test_acc = test(net, test_x, test_y)
    best_acc = test_acc

    print('Test acc for k-means initialization: %.2f' % (test_acc))

    # create optimizer
    optimizer = optim.lvq2_1(net.prototype, net.label, 0.1, 0.25)

    for epoch in range(args.epochs):
        print('Epoch: %d | %d...' % (epoch + 1, args.epochs))
        start = time.time()
        idx = [i for i in range(train_x.size(0))]
        np.random.shuffle(idx)
        for i in idx:
            x = train_x[i, :].view(1, -1)
            y = train_y[i]
            d = net(x)
            optimizer.step(x, y, d)
        test_acc = test(net, test_x, test_y)
        if test_acc > best_acc:
            best_acc = test_acc
        end = time.time()
        print('Runtime: %.2f min. Test acc: %.2f. Best acc: %.2f' %
              ((end - start) / 60, test_acc, best_acc))
コード例 #27
0
ファイル: elbow.py プロジェクト: jinruimeng/keyan
def elbowCore(channelDataAll, a, k, iRate, schedule):
    n = np.shape(channelDataAll[0])[1]  # 列数
    p = len(channelDataAll)  # 页数
    sub = n >> a
    rates_C = []
    rates_U = []
    rates_S = []

    for g in range(1 << a):
        # 显示进度
        schedule[1] += 1
        tmpSchedule = schedule[1]
        print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!')

        channelData = []
        for h in range(p):
            channelDataPage = channelDataAll[h]
            channelData.append(channelDataPage[:, g * sub:(g + 1) * sub])

        covMatrixList = tools.getCovMatrixList(channelData)
        allCovMatrix = tools.matrixListToMatrix(covMatrixList)

        # 对协方差进行聚类
        centroids, clusterAssment = kmeans.KMeansOushi(allCovMatrix, k)
        centroidList = tools.matrixToMatrixList(centroids)

        # 计算原信道信息量、协方差矩阵特征值、变换矩阵
        informations, SigmaList, UList = tools.getInformations(covMatrixList)

        # 分析PCA效果,计算信息量保留程度
        tmpRates = pca.pca(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1]
        rates_C.append(np.mean(tmpRates))

        # 对变换矩阵进行聚类
        allU = tools.matrixListToMatrix_U(UList)
        weights = tools.matrixListToMatrix_U(SigmaList)
        centroids, clusterAssment = kmeans.KMeansOushi_U(allU, k, weights, iRate)
        centroidList = tools.matrixToMatrixList_U(centroids)

        # 分析PCA效果,计算信息量保留程度
        tmpRates = pca.pca_U(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1]
        rates_U.append(np.mean(tmpRates))

        # 不聚类,直接PCA
        tmpRates = pca.pca_S(SigmaList, iRate)[0][:, 1]
        rates_S.append(np.mean(tmpRates))

        # 显示进度
        print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!')

    rate_C = np.mean(rates_C)
    rate_U = np.mean(rates_U)
    rate_S = np.mean(rates_S)

    return rate_S.real, rate_C.real, rate_U.real
コード例 #28
0
ファイル: chemspace.py プロジェクト: rajarshi/rest-ws
def handler(req):
    uriParts = req.uri.split("/")

    tmp = uriParts.index("chemspace")
    if len(uriParts) == tmp + 2:  ## return the list of available spaces
        req.content_type = "text/xml"
        req.write(_getChemicalSpaceDocument([("default", "AlogP, TPSA, num rot bond, MW", 4)]))
        return apache.OK

    if len(uriParts) < tmp + 3 and len(uriParts) < tmp + 4:
        return apache.HTTP_NOT_FOUND

    spaceDef = uriParts[tmp + 1]
    if spaceDef not in ["default"]:
        return apache.HTTP_NOT_FOUND

    ## see if we have a number of components specified
    try:
        numComponent = int(uriParts[tmp + 2])
        molecules = [x.strip() for x in ("/".join(uriParts[(tmp + 3) :])).split(",")]
    except:  ## wasn't a single number
        numComponent = 2
        molecules = [x.strip() for x in ("/".join(uriParts[(tmp + 2) :])).split(",")]

    ## get descriptor values
    if len(molecules) < 3:
        return apache.HTTP_NOT_FOUND
    descriptors = []
    for molecule in molecules:
        data = _getDescriptors(molecule)
        descriptors.append(data)

    if numComponent > len(descriptors[0]):
        numComponent = len(descriptors[0])

    ## do PCA
    import pca
    import numpy

    data = numpy.asarray(descriptors)
    mean, pcs, norm_pcs, variances, positions, norm_positions = pca.pca(data, "svd")

    centeredData = data - mean
    scores = numpy.dot(centeredData, numpy.transpose(pcs))
    scores = scores[: (scores.shape[0]), :numComponent]

    headers_in = req.headers_in
    try:
        accept = headers_in["Accept"]
        accept = accept.split(",")
    except KeyError, e:
        ## we don't throw an exception, since at least one client
        ## (Google Spreadsheets) does not provide an Accept header
        accept = ["text/html"]
コード例 #29
0
    def test_kPCA(self):
        generatedDatasetsCircle = 5

        # for kPCA with random generated datas

        for i in range (generatedDatasetsCircle):
            testDataSet = np.load("unitTest/testData/" + "testCircles" + str(i+1) + ".npy")
            expRes = np.load("unitTest/testData/" + "testCirclesTransformed" + str(i+1) + ".npy")

            resKpca = pca.pca(testDataSet, 1, "kernel")
            npt.assert_array_almost_equal(expRes, resKpca)
コード例 #30
0
    def test_fitSvd(self):
        """ Test PCA using SVD approach. """

        testRequestedDim = 1
        generatedDatasetsGaussian = 5

        # for svd PCA
        testDataSet = np.array([[0, 1, 2, 3, 4, 0,  0],
                                [0, 0, 0, 0, 0, 1, -1]], dtype=np.float64)
        expRes = np.array([[-1.42857, -0.42857, 0.57143, 1.57143, 2.57143, -1.42857, -1.42857]]) # Beware the 2-dimensionality
        resSvd = pca.pca(testDataSet, testRequestedDim, "svd")
        npt.assert_array_almost_equal(expRes, resSvd)

        ## with random generated datas
        for i in range (generatedDatasetsGaussian):
            testDataSet = np.load("unitTest/testData/" + "testGaussianClasses" + str(i+1) + ".npy")
            expRes = np.load("unitTest/testData/" + "testGaussianClassesTransformed" + str(i+1) + ".npy")

            resSvd = pca.pca(testDataSet, 1, "svd")
            npt.assert_array_almost_equal(expRes, resSvd)
コード例 #31
0
ファイル: astromNew.py プロジェクト: amerand/PRIMA
def bootstrapListOfFiles(
    files,
    directory="",
    firstGuess={"M0": 0.0, "SEP": 10.0, "PA": 90.0},
    maxResiduals=None,
    doNotFit=None,
    N=50,
    plot=False,
):
    """
    bootstraped version of fitListOfFiles.

    LIMITATIONS: Only works with one Target, i.e. PA and SEP

    result in arcseconds. 'PCA' is the principal component reduction
    of the error ellipse
    """
    res = []
    p = Pool()
    cb_boot(None, init=True)
    for k in range(N):
        seed = np.random.randint(1e9)
        p.apply_async(f_boot, (files, directory, firstGuess, doNotFit, seed), callback=cb_boot)
    p.close()
    p.join()
    res = cb_boot(None, ret=True)
    # delta DEC
    X = np.array([z["BEST"]["SEP"] * np.cos(z["BEST"]["PA"] * np.pi / 180) for z in res])
    # delta RA cos(DEC)
    Y = np.array([z["BEST"]["SEP"] * np.sin(z["BEST"]["PA"] * np.pi / 180) for z in res])
    p = pca.pca(np.transpose(np.array([(X - X.mean()), (Y - Y.mean())])))
    err0 = p.coef[:, 0].std()
    err1 = p.coef[:, 1].std()
    if plot:
        pyplot.figure(10)
        pyplot.clf()
        pyplot.axes().set_aspect("equal", "datalim")
        pyplot.plot(Y, X, ".k", label="bootstrapped positions", alpha=0.5)
        pyplot.legend()
        pyplot.ylabel(r"$\Delta$ dec [arcsec]")
        pyplot.xlabel(r"$\Delta$ RA $\cos$(dec) [arcsec]")

    # results in usual coordinate frame.
    # units: arcseconds

    result = {
        "Delta RA cos(DEC)": Y,
        "Delta DEC": X,
        "AVG Delta RA cos(DEC)": Y.mean(),
        "AVG Delta DEC": X.mean(),
        "PCA": (list(p.base[0]), list(p.base[1])),
        "errs": (err0, err1),
    }
    return result
コード例 #32
0
def main():
    rootdir = 'fotos'
    
    kernel_degree = 2
    kernel_ctx = 1
    kernel_denom = 30
    
    people_number = 6

    train_number_kpca = 6 
    test_number_kpca = 4

    train_number_pca = 3 
    test_number_pca = 7

    action_op = input('Elegi la accion que queres realizar:\n 1 -> Testear un metodo(pca o kpca) \n 2 -> Clasificar una imagen \n Eleccion(1 o 2): ')
    method_op = input('Elegi el metodo:\n 1 -> PCA \n 2 -> KPCA \n Eleccion(1 o 2): ')

    if action_op == '1':
        if method_op == '1':
            pca.pca(rootdir, people_number, train_number_pca, test_number_pca)
        elif method_op == '2':
            kpca.kpca(rootdir, people_number, train_number_kpca, test_number_kpca, kernel_denom, kernel_ctx, kernel_degree)
        else:
            print("Invalid method")
            exit(1)

    elif action_op == '2':
        name_face = input('Introducir nombre de la persona.\nOpciones:\tagustin\n\t\taugusto\n\t\tcatalina\n\t\tfrancisco\n\t\tguido\n\t\tnicolas\nSu eleccion: ')
        number_face = input('Introducir numero de la foto[1-10]: ')

        if method_op == '1':
            pca.classify_face_by_pca(rootdir, people_number, 6, name_face, number_face)
        elif method_op == '2':
            kpca.classify_face_by_kpca(rootdir, people_number, 4, name_face, number_face)
        else:
            print("Invalid method")
            exit(1)
    else:
        print("Invalid action")
        exit(1)
コード例 #33
0
def visualize_km(data, classes, k=3, num_components=5, perplexity=30, alpha=0.5):


    # Compute k-means without dimensionality reduction
    labels_without = kmeans(data, k)[:, -1]

    # Compute k-means with PCA
    pca_data = pca(data, False)[:, :num_components]
    labels_pca = kmeans(pca_data, k)[:, -1]

    # Project data to t-SNE
    tsne = manifold.TSNE(2, perplexity=perplexity)
    tsne_data = tsne.fit_transform(data)

    fig1, axes1 = plt.subplots(3, 2, figsize=(8, 8))
    plt.subplots_adjust(top=0.961, bottom=0.062, left=0.1, right=0.991, hspace=0.4, wspace=0.3)
    # Plot original labels with 2 principal components (PCA)
    axes1[0, 0].set_title('(a) Original labels (PCA)')
    axes1[0, 0].set_xlabel('1st principal component')
    axes1[0, 0].set_ylabel('2nd principal component')
    axes1[0, 0].scatter(pca_data[:, 0], pca_data[:, 1], c=classes, alpha=alpha, cmap="rainbow")

    # Plot original labels with 2 principal components (t-SNE)
    axes1[0, 1].set_title('(b) Original labels (t-SNE)')
    axes1[0, 1].set_xlabel('1st principal component')
    axes1[0, 1].set_ylabel('2nd principal component')
    axes1[0, 1].scatter(tsne_data[:, 0], tsne_data[:, 1], c=classes, alpha=alpha, cmap="rainbow")

    # Plot k-means without dim red (PCA)
    axes1[1, 0].set_title('(c) K-Means with original data (PCA)')
    axes1[1, 0].set_xlabel('1st principal component')
    axes1[1, 0].set_ylabel('2nd principal component')
    axes1[1, 0].scatter(pca_data[:, 0], pca_data[:, 1], c=labels_without, alpha=alpha, cmap="rainbow")

    # Plot k-means without dim red (t-SNE)
    axes1[1, 1].set_title('(d) K-Means with original data (t-SNE)')
    axes1[1, 1].set_xlabel('1st principal component')
    axes1[1, 1].set_ylabel('2nd principal component')
    axes1[1, 1].scatter(tsne_data[:, 0], tsne_data[:, 1], c=labels_without, alpha=alpha, cmap="rainbow")

    # Plot k-means after PCA (PCA)
    axes1[2, 0].set_title('(e) K-Means after PCA (PCA)')
    axes1[2, 0].set_xlabel('1st principal component')
    axes1[2, 0].set_ylabel('2nd principal component')
    axes1[2, 0].scatter(pca_data[:, 0], pca_data[:, 1], c=labels_pca, alpha=alpha, cmap="rainbow")

    # Plot k-means after PCA (t-SNE)
    axes1[2, 1].set_title('(f) K-Means after PCA (t-SNE)')
    axes1[2, 1].set_xlabel('1st principal component')
    axes1[2, 1].set_ylabel('2nd principal component')
    axes1[2, 1].scatter(tsne_data[:, 0], tsne_data[:, 1], c=labels_pca, alpha=alpha, cmap="rainbow")

    plt.show()
コード例 #34
0
    def get_mapping(self, labels):
        self.z_mapping = []
        self.c_mapping = []
        for i in self.elements:
            temp = self.compute_z(labels[i])
            self.z_mapping.append(self.compute_z(labels[i]))
        self.z_mapping = np.asarray(self.z_mapping)
        Y, P, mu = pca.pca(np.asarray(self.z_mapping), 1)
        for y in Y:
            self.c_mapping.append(1 if y > 0 else 0)

        self.c_mapping = np.asarray(self.c_mapping)
コード例 #35
0
    def data_pre_treatment(training_data, validation_data):

        training_data, validation_data = pca(training_data, validation_data)

        log(
            "CLASSIF",
            "Transformed datasets using PCA.\nTraining Data: {} vectors; \
            Validation Data: {} vectors".format(len(training_data),
                                                len(validation_data)),
            time_start)

        return training_data, validation_data
コード例 #36
0
def linearRegression(tr, te, m, Zee, k=False):
    trainData = tr
    testData = te
    Z = Zee

    if(k):
        featureVectors = kMeans(trainData, m)
    else:
	#pca matrix transposed
    	mean, featureVectors = pca.pca(trainData, m)
    #to extract 1st column: pcaMatT[:, 0]

    trainData = np.matrix(trainData).transpose()
    testData = np.matrix(testData).transpose()

    featureVectors = np.pad(featureVectors, ((0, 1),(0, 0)), 'constant', constant_values = 1)
    #get compressed training data
    ctData = featureVectors * trainData
    ctestData = featureVectors * testData

    Phi = ctData.transpose()

    # Compute the Wopt
    Wopt = (inv(Phi.transpose() * Phi) * Phi.transpose() * Z.transpose()).transpose()
    # print(Wopt.shape)

    SEkTrain = 0
    MRTrain = 0
    SEkTest = 0
    MRTest = 0

    # Calculate the mean square errors and misclassification ratio for the training and testing
    for i in range (0, 1000):
        SEkTrain += pow(norm((Wopt * ctData[:,i] - Z[:,i])[:-1,0]), 2) #Removing the padding
    SEkTrain /= 1000

    for i in range(0, 1000):
        MRTrain += getMCBool(Wopt, ctData[:,i], Z[:, i])
    MRTrain /= 1000.0

    for i in range (0, 1000):
        test = pow(norm((Wopt * ctestData[:,i] - Z[:,i])[:-1,0]), 2) #Removing the padding
        SEkTest += test
    SEkTest /= 1000

    for i in range(0, 1000):
        MRTest += getMCBool(Wopt, ctestData[:,i], Z[:, i])
    MRTest /= 1000.0

    #print SEkTrain, MRTrain
    #print SEkTest, MRTest
    return SEkTrain, MRTrain, SEkTest, MRTest
コード例 #37
0
ファイル: projections.py プロジェクト: dngajjar/Courses
def projections(table, numdim):
    matrix = []
    for i in range(len(table.data[0])):
        tmp = []
        for j in range(len(table.name)-1):
            #print tables[ite][1][0].data[j][i]
            tmp += [float(table.data[j][i])]
        matrix += [tmp] 
    M = numpy.matrix(matrix)
    #C = table.data[table.klass[0]]
    px, py = pca.pca(M, numdim) # generated projected numdim of dimensionality from PCA
    #LDAM = lda.lda(M, C, numdim)
    return widen(table, px, py)  
コード例 #38
0
def plotDisp2D(all_loc):
    ZMat = pca(all_loc.T, k).T
    figure = plt.figure()
    ax = figure.add_subplot(111)
    ax.scatter(ZMat[:, 0].tolist(),
               ZMat[:, 1].tolist(),
               s=50,
               c='green',
               marker='.')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.savefig("THREE2TWO.png")
    plt.show()
コード例 #39
0
def main(mlp_experiment, net, nCxt, outLayer, feat_dir):

    ### Use dropout???
    if sum(net.dropouts):
        useDropout = True
    else:
        useDropout = False
    print 'useDropout', useDropout


    ### Create output directory
    outFeatDir = 'some path to output features/'
    
    # feat template: if your output feat directories obey a particular structure, have empty directories at this path
    feat_template='path for template directory structure needed, if needed'
    os.system('cp -R '+ feat_template + ' ' + feat_dir + outFeatDir)
    assert( os.path.isdir(feat_dir + outFeatDir + 'test_feat_16k/test_07') )
    print 'Output feat_dir:', feat_dir + outFeatDir

    ### Training files
    featList = 'files_train_noisy_16k.txt' #List of training data files
    Nframes=5438715; #Total number of vectors in the dataset

    print 'Transforming training features pre-PCA... '
    t1 = time.time()
    X = apply_nn_train_prePCA(net, nCxt, outLayer, feat_dir, featList, outFeatDir, Nframes, useDropout)
    t2 = time.time()
    print 'Total time taken for xfing training prePCA: ', (t2 - t1)/60, 'minutes'
    np.save('X_prePCA_scale0.05', X)
    # X = np.load('train_prePCA_X_likeMatlab.npy')

    print
    print 'Performing PCA...'
    P = pca(X,39)

    print
    print 'Transforming Training features post-PCA...'
    t1 = time.time()
    featList = 'files_train_noisy_16k_prePCA.txt'
    apply_nn_train_PCA(P, feat_dir, featList, outFeatDir)
    t2 = time.time()
    print 'Total time taken for applying PCA to training data:', (t2 - t1)/60, 'minutes'

    ### Testing data
    print
    print 'Transforming Test data...'
    featList = 'files_test_16k.txt'
    t1 = time.time()
    apply_nn_test(P, net, nCxt, outLayer, feat_dir, featList, outFeatDir, useDropout)
    t2 = time.time()
    print 'Total time taken for xfing testing features: ', (t2 - t1)/60, 'minutes'
コード例 #40
0
ファイル: main.py プロジェクト: menglulul/PCA
def main():
    # change include read number of columns
    data, labels = read("Homework2_pca_c.txt", 12)
    # data = data.astype(np.float)

    my_pca_res = mypca.pca(data)
    sklearn_pca_res = skap.apply_pca(data)
    sklearn_svd_res = skap.apply_svd(data)
    sklearn_tsne_res = skap.apply_tsne(data)

    vs.visualization(my_pca_res, labels, 'my_pca', 'PC')
    vs.visualization(sklearn_pca_res, labels, 'sklearn_pca', 'PC')
    vs.visualization(sklearn_svd_res, labels, 'sklearn_svd', 'SV')
    vs.visualization(sklearn_tsne_res, labels, 'sklearn_tsne', 'tSNE')
コード例 #41
0
def st_sne( data, dim, layers=2, perplexity=30,
        verbose=True,
        E=[] ):
    '''Space-Time Embedding

    data is the NxDIM data matrix,
    dim is the embedding dimension (dim<<DIM),
    return the Nxdim embedding coordinates
    '''

    if data.shape[1] > 30:
        if verbose: print( 'PCA %d->%d' % (data.shape[1], 30) )
        data = pca.pca( data, 30 )
    return st_sned( dist2( data ), dim, layers, perplexity, verbose, E )
コード例 #42
0
ファイル: som.py プロジェクト: quietcoolwu/MLCode
    def __init__(self, x, y, inputs, eta_b=0.3, eta_n=0.1, nSize=0.5, alpha=1, usePCA=1, useBCs=0, eta_bfinal=0.03,
                 eta_nfinal=0.01, nSizefinal=0.05):
        self.nData = np.shape(inputs)[0]
        self.nDim = np.shape(inputs)[1]
        self.mapDim = 2

        self.x = x
        self.y = y
        self.eta_b = eta_b
        self.eta_bfinal = eta_bfinal
        self.eta_n = eta_n
        self.eta_nfinal = eta_nfinal
        self.nSize = nSize
        self.nSizefinal = nSizefinal
        self.alpha = alpha

        self.map = np.mgrid[0:1:np.complex(0, x), 0:1:np.complex(0, y)]
        self.map = np.reshape(self.map, (2, x * y))

        if usePCA:
            dummy1, dummy2, evals, evecs = pca.pca(inputs, 2)
            self.weights = np.zeros((self.nDim, x * y))
            for i in range(x * y):
                for j in range(self.mapDim):
                    self.weights[:, i] += (self.map[j, i] - 0.5) * 2 * evecs[:, j]
        else:
            self.weights = (np.random.rand(self.nDim, x * y) - 0.5) * 2

        self.mapDist = np.zeros((self.x * self.y, self.x * self.y))
        if useBCs:
            for i in range(self.x * self.y):
                for j in range(i + 1, self.x * self.y):
                    xdist = np.min((self.map[0, i] - self.map[0, j]) ** 2,
                                   (self.map[0, i] + 1 + 1. / self.x - self.map[0, j]) ** 2,
                                   (self.map[0, i] - 1 - 1. / self.x - self.map[0, j]) ** 2,
                                   (self.map[0, i] - self.map[0, j] + 1 + 1. / self.x) ** 2,
                                   (self.map[0, i] - self.map[0, j] - 1 - 1. / self.x) ** 2)
                    ydist = np.min((self.map[1, i] - self.map[1, j]) ** 2,
                                   (self.map[1, i] + 1 + 1. / self.y - self.map[1, j]) ** 2,
                                   (self.map[1, i] - 1 - 1. / self.y - self.map[1, j]) ** 2,
                                   (self.map[1, i] - self.map[1, j] + 1 + 1. / self.y) ** 2,
                                   (self.map[1, i] - self.map[1, j] - 1 - 1. / self.y) ** 2)
                    self.mapDist[i, j] = np.sqrt(xdist + ydist)
                    self.mapDist[j, i] = self.mapDist[i, j]
        else:
            for i in range(self.x * self.y):
                for j in range(i + 1, self.x * self.y):
                    self.mapDist[i, j] = np.sqrt(
                        (self.map[0, i] - self.map[0, j]) ** 2 + (self.map[1, i] - self.map[1, j]) ** 2)
                    self.mapDist[j, i] = self.mapDist[i, j]
コード例 #43
0
def main():
    data_base1 = 'List03\Databases\KC1.csv'
    data_base2 = 'List03\Databases\CM1.csv'
    columns_names = "loc,v(g),ev(g),iv(g),n,v,l,d,i,e,b,t,lOCode,lOComment,lOBlank,locCodeAndComment,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount,defects".split(
        ',')
    df = pd.read_csv(data_base1, names=columns_names)  #Change daba_base1 or 2
    data = df.iloc[:, :-1].copy()  #Data without target
    target = df['defects']  #Target
    class_values = df['defects'].unique()  #Number of Classes
    k_components = 3  #[1,3,5,9,15,20]                    #Components for PCA
    #PCA, LDA instances
    pca_instance = pca.pca(data, target)
    lda_instance = lda.lda(df, target, class_values)

    #PCA----------------------------------------------------------------------
    cov_matriz = pca_instance.cov_matriz()
    eigenvalues, eigenvectors = pca_instance.get_eigen_value_vector(cov_matriz)
    eigen_vec = pca_instance.get_eigenvecs(eigenvalues, eigenvectors,
                                           k_components)
    pca_instance.normalize()
    new_dataset = pca_instance.change_base(eigen_vec,
                                           pca_instance.normalize_data)

    #LDA---------------------------------------------------------------------
    mean_vectors = lda_instance.calc_mean_vect()
    data_class = lda_instance.get_data_per_class()
    s_w = lda_instance.calc_sw(mean_vectors, data_class)
    s_b = lda_instance.calc_sb(mean_vectors)
    eig_pairs = lda_instance.get_eigs(s_w, s_b)
    lda_components = lda_instance.get_k_eigenvcs(eig_pairs,
                                                 len(class_values) - 1)
    new_space = pd.DataFrame(lda_instance.transform(lda_components))

    skf = StratifiedKFold(n_splits=3)  #Number of folds
    knns = [1, 3, 5]
    print("Components PCA :%.1d" % k_components)
    for j in knns:
        print("KNN = %.1d" % j)
        print("PCA")
        accuracy_pca = pca_instance.knn(new_dataset, j, skf)
        accuracy_without_pca = pca_instance.knn(data, j, skf)
        print("Acurracy with PCA:%.3f " % np.mean(accuracy_pca))
        print("Acurracy without PCA:%.3f\n" % np.mean(accuracy_without_pca))

        print("LDA")
        accuracy_lda = lda_instance.knn(new_space, j, skf)
        accuracy_without_lda = lda_instance.knn(data, j, skf)
        print("Acurracy with LDA:%.3f " % np.mean(accuracy_lda))
        print("Acurracy without LDA:%.3f\n" % np.mean(accuracy_without_lda))
コード例 #44
0
def train_and_save(a):
    process_data('train/','train_sifts/',a) #process training data
    features,labels = read_gesture_feature_labels('train_sifts/')
    classnames = unique(labels) #sorted lists of unique class names
    V,S,m = pca.pca(features)
    #keep most important dimensions
    dims = 50
    V = V[:dims]
    features = array([dot(V,f-m) for f in features])
    blist = [features[where (labels==c)[0]] for c in classnames]
    with open('features.pkl', 'wb') as f:
        pickle.dump(blist,f)
        pickle.dump(classnames,f)
        pickle.dump(V,f)
        pickle.dump(m,f)
コード例 #45
0
ファイル: som.py プロジェクト: kikoval/Neural-networks
    def __init__(self,x,y,inputs,eta_b=0.3,eta_n=0.1,nSize=0.5,alpha=1,usePCA=1,useBCs=0,eta_bfinal=0.03,eta_nfinal=0.01,nSizefinal=0.05):
        self.nData = np.shape(inputs)[0]
        self.nDim = np.shape(inputs)[1]
        
        # output map size
        # TODO make more universal
        self.mapDim = 2
        self.x = x
        self.y = y

        self.eta_b = eta_b
        self.eta_bfinal = eta_bfinal
        self.eta_n = eta_n
        self.eta_nfinal = eta_nfinal
        self.nSize = nSize
        self.nSizefinal = nSizefinal
        self.alpha = alpha

        self.map = np.mgrid[0:1:complex(0,x),0:1:complex(0,y)]
        self.mapDim = 2
        self.map = np.reshape(self.map,(2,x*y))
        
        # weights initialization
        if usePCA:
            dummy1,dummy2,evals,evecs = pca.pca(inputs,2)
            self.weights = np.zeros((self.nDim,x*y))
            for i in xrange(self.x*self.y):
                for j in range(self.mapDim):
                    self.weights[:,i] += (self.map[j,i]-0.5)*2*evecs[:,j]            
        else:
            # random values from the interval <-1,1>
            self.weights = (np.random.rand(self.nDim,x*y)-0.5)*2    
        
        # pre-computing the map distances
        self.mapDist = np.zeros((self.x*self.y,self.x*self.y))
        if useBCs:
            for i in xrange(self.x*self.y):
                for j in xrange(i+1,self.x*self.y):
                    xdist = np.min([(self.map[0,i]-self.map[0,j])**2,(self.map[0,i]+1+1./self.x-self.map[0,j])**2,(self.map[0,i]-1-1./self.x-self.map[0,j])**2,(self.map[0,i]-self.map[0,j]+1+1./self.x)**2,(self.map[0,i]-self.map[0,j]-1-1./self.x)**2])
                    ydist = np.min([(self.map[1,i]-self.map[1,j])**2,(self.map[1,i]+1+1./self.y-self.map[1,j])**2,(self.map[1,i]-1-1./self.y-self.map[1,j])**2,(self.map[1,i]-self.map[1,j]+1+1./self.y)**2,(self.map[1,i]-self.map[1,j]-1-1./self.y)**2])
                    self.mapDist[i,j] = np.sqrt(xdist+ydist)
                    self.mapDist[j,i] = self.mapDist[i,j]                
        else:
            for i in xrange(self.x*self.y):
                for j in xrange(i+1,self.x*self.y):
                    self.mapDist[i,j] = np.sqrt((self.map[0,i] - self.map[0,j])**2 + (self.map[1,i] - self.map[1,j])**2)
                    self.mapDist[j,i] = self.mapDist[i,j]
コード例 #46
0
ファイル: eigenfaces.py プロジェクト: antonyross/eigenfaces
    def train(self, root_training_images_folder):
        self.projected_classes = []

        self.list_of_arrays_of_images, self.labels_list, \
            list_of_matrices_of_flattened_class_samples = \
                read_images(root_training_images_folder)

         # create matrix to store all flattened images
        images_matrix = np.array([np.array(Image.fromarray(img)).flatten()
              for img in self.list_of_arrays_of_images],'f')

        # perform PCA
        self.eigenfaces_matrix, variance, self.mean_Image = pca.pca(images_matrix)

        # Projecting each class sample (as class matrix) and then using the class average as the class weights for comparison with the Target image
        for class_sample in list_of_matrices_of_flattened_class_samples:
            class_weights_vertex = self.project_image(class_sample)
            self.projected_classes.append(class_weights_vertex.mean(0))
コード例 #47
0
def run_pca(sdata, pca_fraction=0.85, eigenvector_weight=0.25):
    """

    Create a binary matrix via gen_matrix, normalise it, and then run PCA to reduce dimensionality.

    Usage: run_pca(sdata, pca_fraction, eigenvector_weight

        sdata               - parsers.Parse object with sample data as raw sequences
        pca_fraction        - The top pca_fraction fraction of principle components to keep
        eigenvector_weight  - The top fraction of SNPs to keep which occur with high weights in those principle components

    Returns: modified parsers.Parse object

    This function runs makeplot once the data in sdata has been converted to binary and then normalised.
    It calls console to log its results to screen and to logfile.

    """

    console = display.ConsoleDisplay(logname = 'PCA results')
    
    M = numpy.array([ x.data for x in sdata.samples ])

    console.log("Normalising %sx%s matrix" % (len(sdata.samples), len(sdata.samples[0].data)))

    M = pca.normalise(M, log2=False, sub_medians=False, center=True, scale=False)   #Only center the data

    #Unrolling pca.select_genes_by_pca...
    V = pca.pca(M, pca_fraction)    #From SVD
    SNP_indices = pca.select_genes(V, eigenvector_weight)

    console.log("Found %s principle components in the top %s fraction" % (len(V), pca_fraction)) #166
    console.log("Found %s reliable SNPs occurring with high weight (top %s by absolute value)" % (len(SNP_indices), eigenvector_weight)) #410

    #Don't reduce dimensionality right away, we need to take a picture
    for i in xrange(len(sdata.samples)):
        sdata.samples[i].data = M[i]
    
    makeplot(sdata, V, 'PCA results - All samples')

    #Reduce dimensions
    for i in xrange(len(sdata.samples)):
        sdata.samples[i].data = M[i].take(SNP_indices)

    return sdata
コード例 #48
0
	def plotSegments(self, rawData, segPoints="pca", subplot=False, applyPCA=False):
		if "currentTime" in rawData.columns:
			rawData = rawData.drop("currentTime",axis=1)
		if not subplot:
			plt.figure(figsize=(11,9))
		if segPoints == "pca":
			segPoints = self.pca_segmenter.findSegments(rawData)
		if segPoints == "minExtrema":
			segPoints = self.relativeExtremaSegments(rawData, maxMin="min")
		if segPoints == "maxExtrema":
			segPoints = self.relativeExtremaSegments(rawData, maxMin="max")
		if applyPCA:
			plt.plot(pca(rawData, n_components=1)[0])
		else:
			plt.plot(rawData)
		for s in segPoints:
			if s <= len(rawData):
				plt.axvline(s,color='black',linewidth=2)
		if not subplot:
			plt.show()
コード例 #49
0
   def setup_X(self):
       '''
       PCA project the observation matrix
       '''
       #
       # transpose then mean-substract the matrix; 
       # these are necessary steps for PCA
       #
       # first transpose; resulting in a FxN
       # matrix where F is the number of 
       # features and N is the number of
       # instances
       self.indices_to_ids, self.X = self.unlabeled_datasets[0].to_numpy_arr(indicator=True, build_id_dict=True)
       X_t = self.X.T
       # substract the mean from each row
   
       X_t_bar = [r-r.mean() for r in X_t]
       # build a new matrix
       X_t_bar = numpy.array(X_t_bar)
       
       # now run pca on the mean substracted
       # transposed matrix
       self.V,P = pca.pca(X_t_bar)
       self.full_P = P
       #pdb.set_trace()
       # keep only the top r principal components
       drop_n = 0
       if self.r is not None:
           drop_n = P.shape[1]-self.r
           P = P[:,:-drop_n]
           self.P = P.T
 
       
       # finally, project X onto the lower-dimensional
       # space. note that PX will be an r x N matrix
       # wherein each column corresponds to an 
       # instance projected onto the top r principal 
       # components
       self.PX = numpy.dot(P.T, X_t)    
コード例 #50
0
	def findSegments(self, rawData, minSegSize=20):
		PCs = pca(rawData, n_components=2)[0]
		minSegs = argrelmin(PCs[:,0],order=minSegSize)[0]
		maxSegs = argrelmax(PCs[:,0],order=minSegSize)[0]
		minSegs_secondComponentValues = [[PCs[:,1][s]] for s in minSegs]
		maxSegs_secondComponentValues = [[PCs[:,1][s]] for s in maxSegs]

		clf=cluster.KMeans(2)
		min_clusters = clf.fit_predict(minSegs_secondComponentValues)
		max_clusters = clf.fit_predict(maxSegs_secondComponentValues)

		indexes_min_cluster0 = [s for (s,c) in zip(minSegs,min_clusters) if c == 0]
		indexes_min_cluster1 = [s for (s,c) in zip(minSegs,min_clusters) if c == 1]
		indexes_max_cluster0 = [s for (s,c) in zip(maxSegs,max_clusters) if c == 0]
		indexes_max_cluster1 = [s for (s,c) in zip(maxSegs,max_clusters) if c == 1]

		values_min_cluster0 = map(lambda x: PCs[:,0][x], indexes_min_cluster0)
		values_min_cluster1 = map(lambda x: PCs[:,0][x], indexes_min_cluster1)
		values_max_cluster0 = map(lambda x: PCs[:,0][x], indexes_max_cluster0)
		values_max_cluster1 = map(lambda x: PCs[:,0][x], indexes_max_cluster1)

		average_min_cluster0 = abs(sum(values_min_cluster0)/float(len(values_min_cluster0)))
		average_min_cluster1 = abs(sum(values_min_cluster1)/float(len(values_min_cluster1)))
		average_max_cluster0 = abs(sum(values_max_cluster0)/float(len(values_max_cluster0)))
		average_max_cluster1 = abs(sum(values_max_cluster1)/float(len(values_max_cluster1)))

		max_average = max(average_min_cluster0, average_min_cluster1, average_max_cluster0, average_max_cluster1)

		if max_average == abs(average_min_cluster0):
			return indexes_min_cluster0
		if max_average == abs(average_min_cluster1):
			return indexes_min_cluster1
		if max_average == abs(average_max_cluster0):
			return indexes_max_cluster0
		if max_average == abs(average_max_cluster1):
			return indexes_max_cluster1
コード例 #51
0
Created on Jun 14, 2011

@author: Song Yu
'''
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
import pca


def replaceNanWithMean():
    datMat = pca.loadDataSet('secom.data', ' ')
    numFeat = shape(datMat)[1]
    for i in range(numFeat):
        meanVal = mean(datMat[nonzero(~isnan(datMat[:,i].A))[0],i]) #values that are not NaN (a number)
        datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal  #set NaN values to mean
    return datMat


dataMat = replaceNanWithMean()

lowDDataMat, reconMat, total, varPercentage = pca.pca(dataMat, topNfeat=9999999)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(range(1, 51), varPercentage[:50], marker='^')
plt.xlabel('Principal Component Number')
plt.ylabel('Percentage of Variance')
plt.show()

コード例 #52
0
ファイル: createFig3.py プロジェクト: HunterAllman/kod
        x = r0 + 8.0
        y = 1.0*r1 + x
        xcord1.append(x)
        ycord1.append(y)
    elif groupNum == 2:
        x = r0 + 0.0
        y = 1.0*r1 + x
        xcord2.append(x)
        ycord2.append(y)
    fw.write("%f\t%f\t%d\n" % (x, y, groupNum))

fw.close()
fig = plt.figure()
ax = fig.add_subplot(211)
ax.scatter(xcord0,ycord0, marker='^', s=90)
ax.scatter(xcord1,ycord1, marker='o', s=50,  c='red')
ax.scatter(xcord2,ycord2, marker='v', s=50,  c='yellow')
ax = fig.add_subplot(212)
myDat = pca.loadDataSet('testSet3.txt')
lowDDat,reconDat = pca.pca(myDat[:,0:2],1)
label0Mat = lowDDat[nonzero(myDat[:,2]==0)[0],:2][0] #get the items with label 0
label1Mat = lowDDat[nonzero(myDat[:,2]==1)[0],:2][0] #get the items with label 1
label2Mat = lowDDat[nonzero(myDat[:,2]==2)[0],:2][0] #get the items with label 2
#ax.scatter(label0Mat[:,0],label0Mat[:,1], marker='^', s=90)
#ax.scatter(label1Mat[:,0],label1Mat[:,1], marker='o', s=50,  c='red')
#ax.scatter(label2Mat[:,0],label2Mat[:,1], marker='v', s=50,  c='yellow')
ax.scatter(label0Mat[:,0],zeros(shape(label0Mat)[0]), marker='^', s=90)
#ax.scatter(label1Mat[:,0],zeros(shape(label1Mat)[0]), marker='o', s=50,  c='red')
#ax.scatter(label2Mat[:,0],zeros(shape(label2Mat)[0]), marker='v', s=50,  c='yellow')
plt.show()
コード例 #53
0
ファイル: gen.py プロジェクト: mal2/Project-Simulation
for i in range (numberOfTestUnits):
    x = np.random.multivariate_normal([10, 10], [[1,0],[0,50]], size).T
    y = np.random.multivariate_normal([10, 60], [[1,0],[0,50]], size).T
    x = np.concatenate((x,y), axis=1)
    np.save("unitTest/testData/" + "testGaussianClasses" + str(i+1), x)

    # Memory mapped version
    xm = np.memmap("unitTest/testData/" + "testGaussianClassesMmap" + str(i+1) + ".npy", dtype="float64", mode="w+", shape=(2, size*2))
    xm[:] = x
    xm.flush()


    #do PCA
    x = np.load("unitTest/testData/" + "testGaussianClasses" + str(i+1) + ".npy")
    p = pca.pca(x, 1, mode="svd")

    # save data of pca
    np.save("unitTest/testData/" + "testGaussianClassesTransformed" + str(i+1), p)


    # Generate concentric circles
    x, y = dts.make_circles(n_samples=1000, noise=0.1, factor=0.25)
    y = np.reshape(y, (1000,1)) # These are class labels: 0, 1
    x = np.concatenate((x,y), axis=1)
    np.save("unitTest/testData/" + "testCircles" + str(i+1), x.T)

    # Memory mapped circles
    #xm = np.memmap("unitTest/testData/" + "testCirclesMmap" + str(i+1) + ".npy", dtype="float64", mode="w+", shape=(3, 1000))
    #xm[:] = x.T
    #xm.flush()
コード例 #54
0
	def plotAll(self, sensors=['gyroX','gyroY','gyroZ','accelX','accelY','accelZ','magX','magY','magZ'], LR='L', segment=None, applyPCA=False):
			plt.figure(figsize=(12, 8), dpi=80)

			foot = plt.subplot(511)
			plt.title("Foot")
			data = self.feet[2][LR][sensors]
			if segment:
				self.plotSegments(data, segPoints=segment, subplot=True, applyPCA=applyPCA)
			elif applyPCA:
				plt.plot(pca(data, n_components=1)[0])
			else:
				data.plot(ax=foot)
			plt.xlabel("")
			plt.setp(foot.get_xticklabels(), visible=False)

			shin = plt.subplot(512, sharex=foot)
			plt.title("Shin")
			data = self.shins[2][LR][sensors]
			if segment:
				self.plotSegments(data, segPoints=segment, subplot=True, applyPCA=applyPCA)
			elif applyPCA:
				plt.plot(pca(data, n_components=1)[0])
			else:
				data.plot(ax=shin)
			plt.xlabel("")
			plt.setp(shin.get_xticklabels(), visible=False)

			thigh = plt.subplot(513, sharex=foot)
			plt.title("Thigh")
			data = self.thighs[1][LR][sensors]
			if segment:
				self.plotSegments(data, segPoints=segment, subplot=True, applyPCA=applyPCA)
			elif applyPCA:
				plt.plot(pca(data, n_components=1)[0])
			else:
				data.plot(ax=thigh)
			plt.xlabel("")
			plt.setp(thigh.get_xticklabels(), visible=False)

			hip = plt.subplot(514, sharex=foot)
			plt.title("Hip")
			data = self.hips[1][LR][sensors]
			if segment:
				self.plotSegments(data, segPoints=segment, subplot=True, applyPCA=applyPCA)
			elif applyPCA:
				plt.plot(pca(data, n_components=1)[0])
			else:
				data.plot(ax=hip)
			plt.xlabel("")
			plt.setp(hip.get_xticklabels(), visible=False)

			chest = plt.subplot(515, sharex=foot)
			plt.title("Chest")
			data = self.chest[1][LR][sensors]
			if segment:
				self.plotSegments(data, segPoints=segment, subplot=True, applyPCA=applyPCA)
			elif applyPCA:
				plt.plot(pca(data, n_components=1)[0])
			else:
				data.plot(ax=chest)

			plt.show()
コード例 #55
0
ファイル: font_example.py プロジェクト: zachmullen/cv
from PIL import Image
import pca
import numpy as np
import pylab
import os

indir = 'data/a_thumbs'
imlist = [os.path.join(indir, f) for f in os.listdir(indir)]

im = np.array(Image.open(imlist[0]))
m, n = im.shape[0:2]
count = len(imlist)
immatrix = np.array([np.array(Image.open(i)).flatten() for i in imlist], 'f')

V, S, immean = pca.pca(immatrix)

pylab.figure()
pylab.gray()
pylab.subplot(2, 4, 1)
pylab.imshow(immean.reshape(m, n))
for i in range(7):
    pylab.subplot(2, 4, i+2)
    pylab.imshow(V[i].reshape(m, n))

pylab.show()