# Split into a training set and a test set using a stratified k fold # split into a training and testing set train, test = iter(StratifiedKFold(y, k=4)).next() X_train, X_test = X[train], X[test] y_train, y_test = y[train], y[test] ################################################################################ # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150 print "Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0]) t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) print "done in %0.3fs" % (time() - t0) eigenfaces = pca.components_.T.reshape((n_components, h, w)) print "Projecting the input data on the eigenfaces orthonormal basis" t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print "done in %0.3fs" % (time() - t0) ################################################################################ # Train a SVM classification model print "Fitting the classifier to the training set" t0 = time()
digits = datasets.load_digits() # reshape the data using the traditional (n_samples, n_features) shape n_samples = len(digits.images) X = digits.images.reshape((n_samples, -1)) n_features = X.shape[1] n_components = 16 ###################################################################### # Compute a PCA (eigendigits) on the digit dataset print "Extracting the top %d eigendigits from %d images" % (n_components, X.shape[0]) t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X) print "done in %0.3fs" % (time() - t0) eigendigits = pca.components_.T ###################################################################### # Compute a NMF on the digit dataset print "Extracting %d non-negative features from %d images" % (n_components, X.shape[0]) t0 = time() nmf = NMF(n_components=n_components, init='nndsvd', beta=5, tol=1e-2, sparseness="components").fit(X)
categories = [c for c, f in files] category_names = np.unique(categories) target = np.searchsorted(category_names, categories) selected_target = target mask = np.in1d(target, selected_target) X_train = faces y_train = target ################################################################################ # Compute a PCA (eigenfaces) on the face dataset n_components = 150 print "Extracting the top %d eigenfaces" % n_components pca_sl = RandomizedPCA(n_components=n_components, whiten=True) pca_sl.fit(X_train) #components, mean = pca.pca(X_train, n_components) #print "PCA components shape", pca.components_.T.shape #eigenfaces = pca.components_.T.reshape((-1, 64, 64)) # project the input data on the eigenfaces orthonormal basis X_train_pca = pca_sl.transform(X_train) #X_train_pca = pca.transform(X_train, mean, components) ################################################################################ # Train a SVM classification model print "Fitting the classifier to the training set" param_grid = {
# split into a training and testing set train, test = iter(StratifiedKFold(y, k=4)).next() X_train, X_test = X[train], X[test] y_train, y_test = y[train], y[test] ################################################################################ # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150 print "Extracting the top %d eigenfaces from %d faces" % ( n_components, X_train.shape[0]) t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) print "done in %0.3fs" % (time() - t0) eigenfaces = pca.components_.T.reshape((n_components, h, w)) print "Projecting the input data on the eigenfaces orthonormal basis" t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print "done in %0.3fs" % (time() - t0) ################################################################################ # Train a SVM classification model print "Fitting the classifier to the training set"