def dimensional(tx, ty, rx, ry, add=None):
    print "pca"
    for j in range(tx[1].size):
        i = j + 1
        print "===" + str(i)
        compressor = PCA(n_components = i)
        t0 = time()
        compressor.fit(tx, y=ty)
        newtx = compressor.transform(tx)
        runtime=time() - t0
        V = compressor.components_
        print runtime, V.shape, compressor.score(tx)
        distances = np.linalg.norm(tx-compressor.inverse_transform(newtx))
        print distances
    print "pca done"
    print "ica"
    for j in range(tx[1].size):
        i = j + 1
        print "===" + str(i)
        compressor = ICA(whiten=True)
        t0 = time()
        compressor.fit(tx, y=ty)
        newtx = compressor.transform(tx)
        runtime=time() - t0
        print newtx.shape, runtime
        distances = np.linalg.norm(tx-compressor.inverse_transform(newtx))
        print distances
    print "ica done"
    print "RP"
    for j in range(tx[1].size):
        i = j + 1
        print "===" + str(i)
        compressor = RandomProjection(n_components=i)
        t0 = time()
        compressor.fit(tx, y=ty)    
        newtx = compressor.transform(tx)
        runtime=time() - t0
        shape = newtx.shape
        print runtime, shape
    print "RP done"
    print "K-best"
    for j in range(tx[1].size):
        i = j + 1
        print "===" + str(i)
        compressor = best(add, k=i)
        t0 = time()
        compressor.fit(tx, y=ty.ravel())
        newtx = compressor.transform(tx)
        runtime=time() - t0
        shape = newtx.shape
        print runtime, shape
    print "K-best done"
def test_pipeline_transform():
    # Test whether pipeline works with a transformer at the end.
    # Also test pipline.transform and pipeline.inverse_transform
    iris = load_iris()
    X = iris.data
    pca = PCA(n_components=2)
    pipeline = Pipeline([('pca', pca)])

    # test transform and fit_transform:
    X_trans = pipeline.fit(X).transform(X)
    X_trans2 = pipeline.fit_transform(X)
    X_trans3 = pca.fit_transform(X)
    assert_array_almost_equal(X_trans, X_trans2)
    assert_array_almost_equal(X_trans, X_trans3)

    X_back = pipeline.inverse_transform(X_trans)
    X_back2 = pca.inverse_transform(X_trans)
    assert_array_almost_equal(X_back, X_back2)
Exemple #3
0
def test_pipeline_transform():
    # Test whether pipeline works with a transformer at the end.
    # Also test pipline.transform and pipeline.inverse_transform
    iris = load_iris()
    X = iris.data
    pca = PCA(n_components=2)
    pipeline = Pipeline([('pca', pca)])

    # test transform and fit_transform:
    X_trans = pipeline.fit(X).transform(X)
    X_trans2 = pipeline.fit_transform(X)
    X_trans3 = pca.fit_transform(X)
    assert_array_almost_equal(X_trans, X_trans2)
    assert_array_almost_equal(X_trans, X_trans3)

    X_back = pipeline.inverse_transform(X_trans)
    X_back2 = pca.inverse_transform(X_trans)
    assert_array_almost_equal(X_back, X_back2)
Exemple #4
0
def pca(X, y, components, max_cluster, num_classes, run_nn=False):
    X_train, X_test, y_train, y_test = train_test_split(X,
        y, test_size=0.3, train_size=0.7, shuffle=True)
    pca_compress = PCA(n_components=components, whiten=True)
    pca_compress.fit(X_train, y=y_train)
    X_train_new = pca_compress.transform(X_train)
    X_test_new = pca_compress.transform(X_test)
    X_original = pca_compress.inverse_transform(X_test_new)
    loss = ((X_test - X_original)**2).mean()
    print("Reconstruction Error " + str(loss))
    eigenvalues = pca_compress.explained_variance_
    print(eigenvalues)
    if run_nn:
        mlp_classifier(X_train_new, y_train, 0.3, plot=True, X_test=X_test_new, y_test=y_test)
    X_new = np.concatenate((X_train_new, X_test_new), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)
    kmeans(X_new, y,max_cluster, num_classes, run_nn=run_nn, plot_cluster=True,
        reduction_algo='PCA')
    expectation_max(X_new, y, max_cluster, num_classes, run_nn=run_nn,
        plot_cluster=True, reduction_algo='PCA')
    r('predictions_dl <- h2o.predict(dlmodel, test3.hex)')
    r('head(predictions_dl)')
    ## new predictions
    pred = r('as.matrix(predictions_dl)')
    return var(pred -test)
################################################################

figure()
variances_table = []

for i in range(2,11,1):
    pca = PCA(n_components=i)
    der = derivatives[train_mask_TL]
    pca.fit(der)
    X = pca.transform(derivatives[test_mask])
    pred_pca_temp = (pca.inverse_transform(X))

    #
    var_fraction_pca_TL = var(pred_pca_temp-derivatives[test_mask])/var(derivatives[test_mask])
    #plot([i], [var(pred_pca_temp-derivatives[test_mask])],'D')

    var_fraction_DL_TL = DL( derivatives[train_mask_TL], derivatives[test_mask], i)/var(derivatives[test_mask])
    #plot([i], [var_DL_TL ],'Dk')

    pca = PCA(n_components=i)
    der = derivatives[train_mask_no_TL]
    pca.fit(der)
    X = pca.transform(derivatives[test_mask])
    pred_pca_temp = (pca.inverse_transform(X))

    var_fraction_pca_no_TL = var(pred_pca_temp-derivatives[test_mask])/var(derivatives[test_mask])
from numpy import loadtxt, genfromtxt, shape, mean, sort, savetxt, size, array, copy
from pylab import figure
from matplotlib.pyplot import plot, savefig, xlabel, ylabel, scatter, axis, xlim, fill_between, legend, text
from sklearn.decomposition.pca import PCA
data_dir= '../data_all_types/'
out_dir='./plots/'

der = loadtxt(data_dir+'derivatives.dat')
flux = loadtxt(data_dir+'fluxes_not_res.dat.gz')
labels = loadtxt(data_dir+'labels.dat')
spectra_data = genfromtxt(data_dir+'spectra_data.dat',dtype=None)

pca = PCA(n_components=4)
pca.fit(der)
X = pca.transform(der)
pred_PCA = (pca.inverse_transform(X))
pca = PCA(n_components=15)
pca.fit(der)
X = pca.transform(der)
pred_PCA_15PC = (pca.inverse_transform(X))
pred_DL = loadtxt('out_DeepLearning/predictions_120,100,90,50,30,20,4,20,30,50,90,100,120_seed1_dl.dat' )


#range_to_plot=[1,2,3,4,5,6,10]
#range_to_plot=range(300)

#range_to_plot=[]
#labels_to_plot=[]
#for i in range(size(labels)):
#    if spectra_data['f3'][i]> 0.2 and spectra_data['f3'][i]<.5:
#        range_to_plot.append(i)