Example #1
0
def Dimension_Reduction():
    from Helper_func import reduced_dimension_data
    n_times = 1
    n_size = [4096, 2048, 1024, 512, 256, 128, 56, 10, 5]
    r2 = np.zeros((n_times, len(n_size)))
    dim_data = 50
    from tqdm import tqdm
    n_comp = [5, 10, 15, 20]
    for comp in n_comp:
        for i in tqdm(xrange(n_times)):
            dims = PCA(n_components=comp)
            for j, n in enumerate(n_size):
                N, C = gen_data(dim_data, N_size=n)

                # Transform the train data-set
                scaler = preprocessing.StandardScaler(with_mean = True,\
                    with_std = True).fit(N)
                X_train = scaler.transform(N)

                # Reduced dimensions
                Train_PCA = dims.fit_transform(X_train)
                Train_HDR, Test_HDR = reduced_dimension_data(
                    X_train, X_train, [2, comp, 'corr'])

                from sklearn.linear_model import LinearRegression
                lm = LinearRegression()
                lm.fit(Train_PCA, Train_HDR)
                r2[i, j] = lm.score(Train_PCA, Train_HDR)

        print("comp", comp, "mean r2 square", r2.mean(axis=0))
Example #2
0
        d1, _ = X_test.shape
        X_test = X_test.reshape((d1, -1))

    scaler = preprocessing.StandardScaler(with_mean=True,
                                          with_std=True).fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    Arr = np.random.choice(X_train.shape[0], 5000)
    X_train = X_train[Arr, :]
    y_train = y_train[Arr, :]
    Arr = np.random.choice(X_test.shape[0], 5000)
    X_test = X_test[Arr, :]
    y_test = y_test[Arr, :]
    from Helper_func import reduced_dimension_data
    acc = np.zeros((N, 8))
    n_comp = 2
    # g_size = 2
    rng = np.random.RandomState(0)
    from Helper_func import comparison
    print("y Train", y_train.shape, "y Test", y_test.shape)
    for i in tqdm(xrange(N)):
        start = time.time()
        Train, Test = reduced_dimension_data(X_train, X_test, [1000, 3, 0.99])
        print("Finished NDR time ", time.time() - start)
        print("Train", Train.shape, "Test", Test.shape)
        names, acc[i, :] = comparison(Train, y_train, Test, y_test)
        print("########################################################")
        print("classifiers", names)
        print("Accuracies", acc[i, :])
        print("########################################################")
Example #3
0
X_train = X
X_test  = X
N = 1

print("Train", X_train.shape, "Test", X_test.shape)
from Helper_func import reduced_dimension_data
sco = np.zeros((1,9))
acc = np.zeros((1,9))
n_comp = 2
g_size = 2
rng = np.random.RandomState(0)
from Helper_func import comparison

for i in tqdm(xrange(N)):
    start = time.time()
    Train, Test = reduced_dimension_data(X_train, X_test, [20, 5, 0.99])
    print("Finished NDR time ", time.time()-start)
    names, sco[i,:], acc[i,:] = comparison(Train, y_train, Test, y_test)

import timeit
print('PCAwithoutNDR')
start = time.time()
PCA_visual(X_train, y,'PCA')
print("Finished PCA time ", time.time()-start)

print('NDR')
start = time.time()
NDR_visual(Train, y, 'NDR')
print("Finished PCA time ", time.time()-start)

# print('TSNE_without')
        # use grid search cross-validation to optimize the bandwidth
        temp = extract_samples(XTrain, yTrain, i)
        params = {'bandwidth': np.logspace(-1, 1, 20)}
        grid = GridSearchCV(KernelDensity(), params)
        grid.fit(temp)
        print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
        kde_skl = grid.best_estimator_
        pdf_d.append(kde_skl)
    return pdf_d


from Helper_func import reduced_dimension_data
## Lets do this for all data-sets and how this works
data_name = [
    'rolling', 'sensorless', 'notmnist', 'arcene', 'gisette', 'dexter'
]
param = [[2, 5, 'corr'], [2, 24, 'corr'], [2, 392, 'corr'], [100, 100, 'corr'],
         [10, 50, 'corr'], [100, 20, 'corr']]
for i, data in enumerate(data_name):
    from Helper_func import import_pickled_data
    XTr, yTr, TTe, yTe = import_pickled_data(data)
    print("data", data)
    XTr, XTe = reduced_dimension_data(XTr, TTe, param[i])
    ## Lets reduce the data first
    print("Shapes", XTr.shape, XTe.shape, yTr.shape, yTe.shape)
    pdf = classify_samples(XTr, yTr)
    print(len(pdf))
    predict = bayes_classifier(XTe, pdf)
    from sklearn.metrics import accuracy_score
    print accuracy_score(yTe, predict, normalize=True, sample_weight=None)