def Dimension_Reduction(): from Helper_func import reduced_dimension_data n_times = 1 n_size = [4096, 2048, 1024, 512, 256, 128, 56, 10, 5] r2 = np.zeros((n_times, len(n_size))) dim_data = 50 from tqdm import tqdm n_comp = [5, 10, 15, 20] for comp in n_comp: for i in tqdm(xrange(n_times)): dims = PCA(n_components=comp) for j, n in enumerate(n_size): N, C = gen_data(dim_data, N_size=n) # Transform the train data-set scaler = preprocessing.StandardScaler(with_mean = True,\ with_std = True).fit(N) X_train = scaler.transform(N) # Reduced dimensions Train_PCA = dims.fit_transform(X_train) Train_HDR, Test_HDR = reduced_dimension_data( X_train, X_train, [2, comp, 'corr']) from sklearn.linear_model import LinearRegression lm = LinearRegression() lm.fit(Train_PCA, Train_HDR) r2[i, j] = lm.score(Train_PCA, Train_HDR) print("comp", comp, "mean r2 square", r2.mean(axis=0))
d1, _ = X_test.shape X_test = X_test.reshape((d1, -1)) scaler = preprocessing.StandardScaler(with_mean=True, with_std=True).fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) Arr = np.random.choice(X_train.shape[0], 5000) X_train = X_train[Arr, :] y_train = y_train[Arr, :] Arr = np.random.choice(X_test.shape[0], 5000) X_test = X_test[Arr, :] y_test = y_test[Arr, :] from Helper_func import reduced_dimension_data acc = np.zeros((N, 8)) n_comp = 2 # g_size = 2 rng = np.random.RandomState(0) from Helper_func import comparison print("y Train", y_train.shape, "y Test", y_test.shape) for i in tqdm(xrange(N)): start = time.time() Train, Test = reduced_dimension_data(X_train, X_test, [1000, 3, 0.99]) print("Finished NDR time ", time.time() - start) print("Train", Train.shape, "Test", Test.shape) names, acc[i, :] = comparison(Train, y_train, Test, y_test) print("########################################################") print("classifiers", names) print("Accuracies", acc[i, :]) print("########################################################")
X_train = X X_test = X N = 1 print("Train", X_train.shape, "Test", X_test.shape) from Helper_func import reduced_dimension_data sco = np.zeros((1,9)) acc = np.zeros((1,9)) n_comp = 2 g_size = 2 rng = np.random.RandomState(0) from Helper_func import comparison for i in tqdm(xrange(N)): start = time.time() Train, Test = reduced_dimension_data(X_train, X_test, [20, 5, 0.99]) print("Finished NDR time ", time.time()-start) names, sco[i,:], acc[i,:] = comparison(Train, y_train, Test, y_test) import timeit print('PCAwithoutNDR') start = time.time() PCA_visual(X_train, y,'PCA') print("Finished PCA time ", time.time()-start) print('NDR') start = time.time() NDR_visual(Train, y, 'NDR') print("Finished PCA time ", time.time()-start) # print('TSNE_without')
# use grid search cross-validation to optimize the bandwidth temp = extract_samples(XTrain, yTrain, i) params = {'bandwidth': np.logspace(-1, 1, 20)} grid = GridSearchCV(KernelDensity(), params) grid.fit(temp) print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth)) kde_skl = grid.best_estimator_ pdf_d.append(kde_skl) return pdf_d from Helper_func import reduced_dimension_data ## Lets do this for all data-sets and how this works data_name = [ 'rolling', 'sensorless', 'notmnist', 'arcene', 'gisette', 'dexter' ] param = [[2, 5, 'corr'], [2, 24, 'corr'], [2, 392, 'corr'], [100, 100, 'corr'], [10, 50, 'corr'], [100, 20, 'corr']] for i, data in enumerate(data_name): from Helper_func import import_pickled_data XTr, yTr, TTe, yTe = import_pickled_data(data) print("data", data) XTr, XTe = reduced_dimension_data(XTr, TTe, param[i]) ## Lets reduce the data first print("Shapes", XTr.shape, XTe.shape, yTr.shape, yTe.shape) pdf = classify_samples(XTr, yTr) print(len(pdf)) predict = bayes_classifier(XTe, pdf) from sklearn.metrics import accuracy_score print accuracy_score(yTe, predict, normalize=True, sample_weight=None)