def pcaImgWrdMat(highDim, lowDim): (options, args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable dataset = options.dataset # method = options.method #acquire the category list catmap = getCatMap(dataset) catList = catmap.keys() # the number of categories in category list # nCategory = len(catList) for catName in catList: print '%s : %d : %d\n' % (catName, highDim, lowDim) catPosFileName = rootDir + dataset + iwmDir + catName + str( highDim) + iwmext catPosData = np.loadtxt(catPosFileName, dtype=np.int, delimiter=' ') nPosImages = catPosData.shape[0] catNegFileName = rootDir + dataset + iwmDir + 'NEG' + catName + str( highDim) + iwmext catNegData = np.loadtxt(catNegFileName, dtype=np.int, delimiter=' ') nNegImages = catNegData.shape[0] catData = np.vstack((catPosData, catNegData)) labels = np.vstack((np.ones( (nPosImages, 1), np.int), np.zeros((nNegImages, 1), np.int))) print 'pca...' pcaData = PCA(n_components=lowDim).fit(catData).transform(catData) pcaData = np.hstack((pcaData, labels)) pcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.pca' np.savetxt(pcaDataFileName, pcaData, fmt='%f', delimiter=' ') print 'ppca...' ppcaData = ProbabilisticPCA( n_components=lowDim).fit(catData).transform(catData) ppcaData = np.hstack((ppcaData, labels)) ppcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.ppca' np.savetxt(ppcaDataFileName, ppcaData, fmt='%f', delimiter=' ') print 'rpca...' rpcaData = RandomizedPCA( n_components=lowDim).fit(catData).transform(catData) rpcaData = np.hstack((rpcaData, labels)) rpcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.rpca' np.savetxt(rpcaDataFileName, rpcaData, fmt='%f', delimiter=' ') print 'kpca...' kpcaData = KernelPCA( n_components=lowDim).fit(catData).transform(catData) kpcaData = np.hstack((kpcaData, labels)) kpcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.kpca' np.savetxt(kpcaDataFileName, kpcaData, fmt='%f', delimiter=' ') print 'spca...' spcaData = MiniBatchSparsePCA( n_components=lowDim, n_iter=100).fit(catData).transform(catData) spcaData = np.hstack((spcaData, labels)) spcaDataFileName = rootDir + dataset + outputDir + catName + str( highDim) + str(lowDim) + '.spca' np.savetxt(spcaDataFileName, spcaData, fmt='%f', delimiter=' ') pass
def test_probabilistic_pca_vs_pca(): """Test that PCA matches ProbabilisticPCA with homoscedastic=True """ n, p = 100, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) pca = PCA(n_components=2).fit(X) ppca = ProbabilisticPCA(n_components=2).fit(X) assert_array_almost_equal(pca.score_samples(X), ppca.score(X))
def test_probabilistic_pca_2(): """Test that probabilistic PCA correctly separated different datasets""" n, p = 100, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) ppca = ProbabilisticPCA(n_components=2) ppca.fit(X) ll1 = ppca.score(X) ll2 = ppca.score(rng.randn(n, p) * .2 + np.array([3, 4, 5])) assert_greater(ll1.mean(), ll2.mean())
def test_probabilistic_pca_1(): """Test that probabilistic PCA yields a reasonable score""" n, p = 1000, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) ppca = ProbabilisticPCA(n_components=2) ppca.fit(X) ll1 = ppca.score(X) h = -0.5 * np.log(2 * np.pi * np.exp(1) * 0.1**2) * p np.testing.assert_almost_equal(ll1.mean() / h, 1, 0)
def test_probabilistic_pca_3(): """The homoscedastic model should work slightly worse than the heteroscedastic one in over-fitting condition """ n, p = 100, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) ppca = ProbabilisticPCA(n_components=2) ppca.fit(X) ll1 = ppca.score(X) ppca.fit(X, homoscedastic=False) ll2 = ppca.score(X)
def test_probabilistic_pca_4(): """Check that ppca select the right model""" n, p = 200, 3 rng = np.random.RandomState(0) Xl = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7])) Xt = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7])) ll = np.zeros(p) for k in range(p): ppca = ProbabilisticPCA(n_components=k) ppca.fit(Xl) ll[k] = ppca.score(Xt).mean() assert_true(ll.argmax() == 1)