def dimensional(tx, ty, rx, ry, add=None): print "pca" for j in range(tx[1].size): i = j + 1 print "===" + str(i) compressor = PCA(n_components = i) t0 = time() compressor.fit(tx, y=ty) newtx = compressor.transform(tx) runtime=time() - t0 V = compressor.components_ print runtime, V.shape, compressor.score(tx) distances = np.linalg.norm(tx-compressor.inverse_transform(newtx)) print distances print "pca done" print "ica" for j in range(tx[1].size): i = j + 1 print "===" + str(i) compressor = ICA(whiten=True) t0 = time() compressor.fit(tx, y=ty) newtx = compressor.transform(tx) runtime=time() - t0 print newtx.shape, runtime distances = np.linalg.norm(tx-compressor.inverse_transform(newtx)) print distances print "ica done" print "RP" for j in range(tx[1].size): i = j + 1 print "===" + str(i) compressor = RandomProjection(n_components=i) t0 = time() compressor.fit(tx, y=ty) newtx = compressor.transform(tx) runtime=time() - t0 shape = newtx.shape print runtime, shape print "RP done" print "K-best" for j in range(tx[1].size): i = j + 1 print "===" + str(i) compressor = best(add, k=i) t0 = time() compressor.fit(tx, y=ty.ravel()) newtx = compressor.transform(tx) runtime=time() - t0 shape = newtx.shape print runtime, shape print "K-best done"
def test_pipeline_transform(): # Test whether pipeline works with a transformer at the end. # Also test pipline.transform and pipeline.inverse_transform iris = load_iris() X = iris.data pca = PCA(n_components=2) pipeline = Pipeline([('pca', pca)]) # test transform and fit_transform: X_trans = pipeline.fit(X).transform(X) X_trans2 = pipeline.fit_transform(X) X_trans3 = pca.fit_transform(X) assert_array_almost_equal(X_trans, X_trans2) assert_array_almost_equal(X_trans, X_trans3) X_back = pipeline.inverse_transform(X_trans) X_back2 = pca.inverse_transform(X_trans) assert_array_almost_equal(X_back, X_back2)
def test_pipeline_transform(): # Test whether pipeline works with a transformer at the end. # Also test pipline.transform and pipeline.inverse_transform iris = load_iris() X = iris.data pca = PCA(n_components=2) pipeline = Pipeline([('pca', pca)]) # test transform and fit_transform: X_trans = pipeline.fit(X).transform(X) X_trans2 = pipeline.fit_transform(X) X_trans3 = pca.fit_transform(X) assert_array_almost_equal(X_trans, X_trans2) assert_array_almost_equal(X_trans, X_trans3) X_back = pipeline.inverse_transform(X_trans) X_back2 = pca.inverse_transform(X_trans) assert_array_almost_equal(X_back, X_back2)
def pca(X, y, components, max_cluster, num_classes, run_nn=False): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.7, shuffle=True) pca_compress = PCA(n_components=components, whiten=True) pca_compress.fit(X_train, y=y_train) X_train_new = pca_compress.transform(X_train) X_test_new = pca_compress.transform(X_test) X_original = pca_compress.inverse_transform(X_test_new) loss = ((X_test - X_original)**2).mean() print("Reconstruction Error " + str(loss)) eigenvalues = pca_compress.explained_variance_ print(eigenvalues) if run_nn: mlp_classifier(X_train_new, y_train, 0.3, plot=True, X_test=X_test_new, y_test=y_test) X_new = np.concatenate((X_train_new, X_test_new), axis=0) y = np.concatenate((y_train, y_test), axis=0) kmeans(X_new, y,max_cluster, num_classes, run_nn=run_nn, plot_cluster=True, reduction_algo='PCA') expectation_max(X_new, y, max_cluster, num_classes, run_nn=run_nn, plot_cluster=True, reduction_algo='PCA')
r('predictions_dl <- h2o.predict(dlmodel, test3.hex)') r('head(predictions_dl)') ## new predictions pred = r('as.matrix(predictions_dl)') return var(pred -test) ################################################################ figure() variances_table = [] for i in range(2,11,1): pca = PCA(n_components=i) der = derivatives[train_mask_TL] pca.fit(der) X = pca.transform(derivatives[test_mask]) pred_pca_temp = (pca.inverse_transform(X)) # var_fraction_pca_TL = var(pred_pca_temp-derivatives[test_mask])/var(derivatives[test_mask]) #plot([i], [var(pred_pca_temp-derivatives[test_mask])],'D') var_fraction_DL_TL = DL( derivatives[train_mask_TL], derivatives[test_mask], i)/var(derivatives[test_mask]) #plot([i], [var_DL_TL ],'Dk') pca = PCA(n_components=i) der = derivatives[train_mask_no_TL] pca.fit(der) X = pca.transform(derivatives[test_mask]) pred_pca_temp = (pca.inverse_transform(X)) var_fraction_pca_no_TL = var(pred_pca_temp-derivatives[test_mask])/var(derivatives[test_mask])
from numpy import loadtxt, genfromtxt, shape, mean, sort, savetxt, size, array, copy from pylab import figure from matplotlib.pyplot import plot, savefig, xlabel, ylabel, scatter, axis, xlim, fill_between, legend, text from sklearn.decomposition.pca import PCA data_dir= '../data_all_types/' out_dir='./plots/' der = loadtxt(data_dir+'derivatives.dat') flux = loadtxt(data_dir+'fluxes_not_res.dat.gz') labels = loadtxt(data_dir+'labels.dat') spectra_data = genfromtxt(data_dir+'spectra_data.dat',dtype=None) pca = PCA(n_components=4) pca.fit(der) X = pca.transform(der) pred_PCA = (pca.inverse_transform(X)) pca = PCA(n_components=15) pca.fit(der) X = pca.transform(der) pred_PCA_15PC = (pca.inverse_transform(X)) pred_DL = loadtxt('out_DeepLearning/predictions_120,100,90,50,30,20,4,20,30,50,90,100,120_seed1_dl.dat' ) #range_to_plot=[1,2,3,4,5,6,10] #range_to_plot=range(300) #range_to_plot=[] #labels_to_plot=[] #for i in range(size(labels)): # if spectra_data['f3'][i]> 0.2 and spectra_data['f3'][i]<.5: # range_to_plot.append(i)