def rbf_sampler_exp(): fourier_score_list = [] for i in range(100): data_train, targets_train = data_parser(num_in_samples=150) data_test, targets_test = data_parser(num_in_samples=1000) feature_map_fourier = RBFSampler(gamma=.2, random_state=10) # feature_map_nystroem = Nystroem(gamma=.2, random_state=1) fourier_approx_gp = pipeline.Pipeline([("feature_map", feature_map_fourier), ("GP", GaussianProcess(corr='squared_exponential', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100, nugget = 3.00e-12))]) fourier_approx_gp.set_params(feature_map__n_components=2) fourier_approx_gp.fit(data_train, targets_train) fourier_score = fourier_approx_gp.score(data_test, targets_test) # print "Start of fitting RBF" # feature_map_fourier.fit(data_train, targets_train) # x = feature_map_fourier.transform(data_train) # # print feature_map_fourier # print "data_train = ", len(data_train) # print "x = ", len(x) print fourier_score fourier_score_list.append(fourier_score) plt.figure(figsize=(15.0, 11.0)) plt.boxplot(fourier_score_list) plt.ylabel("R2 Score", fontsize=20) plt.xlabel("Number of sampling points", fontsize=20) plt.show()
def nn_gp(): for dimension in [6]: print "Current Dimension = ", dimension X, y = data_parser(num_in_samples=18000, file_name="all_outputs_"+str(dimension)+"d.txt") X= np.array(X).astype(np.float) y= np.array(y).astype(np.float) data_test, targets_test = data_parser(num_in_samples=1000, file_name="all_outputs_"+str(dimension)+"d.txt") parameters = dict(algorithm = ['l-bfgs', 'sgd'], hidden_layer_sizes = [60, 100], activation = ['logistic', 'tanh', 'relu'], alpha = [0.00001], max_iter = [2000], learning_rate = ['invscaling']) # clf = MultilayerPerceptronRegressor(algorithm='l-bfgs', hidden_layer_sizes=60, activation='tanh', alpha=0.00001, max_iter=2000, learning_rate='invscaling') # l-bfgs sgd mlp = MultilayerPerceptronRegressor() clf = grid_search.GridSearchCV(mlp, parameters) clf.fit(X, y) data_test = np.array(data_test).astype(np.float) targets_test = np.array(targets_test).astype(np.float) # mlp.fit(X, y) # y_compute = mlp.predict(X) # mlp_score = mlp.score(data_test, targets_test) grid_score = clf.score(data_test, targets_test) print grid_score print(grid_search.best_estimator_)
def nn_gp(): for dimension in [6]: print "Current Dimension = ", dimension X, y = data_parser(num_in_samples=18000, file_name="all_outputs_" + str(dimension) + "d.txt") X = np.array(X).astype(np.float) y = np.array(y).astype(np.float) data_test, targets_test = data_parser(num_in_samples=1000, file_name="all_outputs_" + str(dimension) + "d.txt") parameters = dict(algorithm=['l-bfgs', 'sgd'], hidden_layer_sizes=[60, 100], activation=['logistic', 'tanh', 'relu'], alpha=[0.00001], max_iter=[2000], learning_rate=['invscaling']) # clf = MultilayerPerceptronRegressor(algorithm='l-bfgs', hidden_layer_sizes=60, activation='tanh', alpha=0.00001, max_iter=2000, learning_rate='invscaling') # l-bfgs sgd mlp = MultilayerPerceptronRegressor() clf = grid_search.GridSearchCV(mlp, parameters) clf.fit(X, y) data_test = np.array(data_test).astype(np.float) targets_test = np.array(targets_test).astype(np.float) # mlp.fit(X, y) # y_compute = mlp.predict(X) # mlp_score = mlp.score(data_test, targets_test) grid_score = clf.score(data_test, targets_test) print grid_score print(grid_search.best_estimator_)
def svm_gp(): final_results = {} # for dimension in [2, 3, 4, 5, 6, 12]: for dimension in [6]: print "Current Dimension = ", dimension X, y = data_parser(num_in_samples=15000, file_name="all_outputs_"+str(dimension)+"d.txt") gp_data, gp_target = data_parser(num_in_samples=10, file_name="all_outputs_"+str(dimension)+"d.txt") data_test, targets_test = data_parser(num_in_samples=1000, file_name="all_outputs_"+str(dimension)+"d.txt") # Fit regression model svm_non_linear = svm.NuSVR() # svm_non_linear = svm.SVR(C=10, kernel='poly', epsilon=0.02, degree=5, tol=1e-5) y_non_linear = svm_non_linear.fit(X, y) gp_data_base = {} for i in range(len(gp_target)): gp_data_base[gp_target[i]] = gp_data[i] toolbar_width = 100 sys.stdout.write("[%s]" % (" " * toolbar_width)) sys.stdout.flush() sys.stdout.write("\b" * (toolbar_width+1)) # return to start of line, after '[' for i in range(100): gp = GaussianProcess(corr='squared_exponential', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100, nugget=3.00e-14) # Pick a random sample gp.fit(gp_data_base.values(), gp_data_base.keys()) random_data, random_target = data_parser(num_in_samples=1, file_name="all_outputs_"+str(dimension)+"d.txt") gp_data_base[random_target[0]] = random_data[0] # Get the score for this point from # model_0 = svm_non_linear.predict(random_data) + gp.predict(random_data) # model_1 = svm_non_linear.predict(random_data) * gp.predict(random_data) # y_pred, MSE = gp.predict(data_test, eval_MSE=True) # print MSE sys.stdout.write("-") sys.stdout.flush() sys.stdout.write("\n") # model_0 = svm_non_linear.predict(data_test) + gp.predict(data_test) final_results[dimension] = {"model_0": svm_non_linear.predict(data_test), "model_1": svm_non_linear.predict(data_test) + gp.predict(data_test, eval_MSE=True)[1], "model_2": -1 * svm_non_linear.predict(data_test) * gp.predict(data_test)} model_0 = svm_non_linear.predict(data_test) model_1 = svm_non_linear.predict(data_test) + gp.predict(data_test, eval_MSE=True)[1] model_2 = -1 * svm_non_linear.predict(data_test) * gp.predict(data_test) model_3 = (svm_non_linear.predict(data_test) + gp.predict(data_test)) * 0.5 print "model_0 score = ", repr(r2_score(targets_test, model_0)) print "model_1 score = ", repr(r2_score(targets_test, model_1)) print "model_2 score = ", repr(r2_score(targets_test, model_2)) print "model_3 score = ", repr(r2_score(targets_test, model_3)) print "---------------------------"
def pca_analysis(): for D in range(1,13): data_train, targets_train = data_parser(num_in_samples=20000) pca = PCA(n_components=D) pca.fit(data_train) # print(pca.explained_variance_ratio_) print(pca.score_samples(data_train))
def pca_analysis(): for D in range(1, 13): data_train, targets_train = data_parser(num_in_samples=20000) pca = PCA(n_components=D) pca.fit(data_train) # print(pca.explained_variance_ratio_) print(pca.score_samples(data_train))
def rbf_sampler_exp(): fourier_score_list = [] for i in range(100): data_train, targets_train = data_parser(num_in_samples=150) data_test, targets_test = data_parser(num_in_samples=1000) feature_map_fourier = RBFSampler(gamma=.2, random_state=10) # feature_map_nystroem = Nystroem(gamma=.2, random_state=1) fourier_approx_gp = pipeline.Pipeline([ ("feature_map", feature_map_fourier), ("GP", GaussianProcess(corr='squared_exponential', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100, nugget=3.00e-12)) ]) fourier_approx_gp.set_params(feature_map__n_components=2) fourier_approx_gp.fit(data_train, targets_train) fourier_score = fourier_approx_gp.score(data_test, targets_test) # print "Start of fitting RBF" # feature_map_fourier.fit(data_train, targets_train) # x = feature_map_fourier.transform(data_train) # # print feature_map_fourier # print "data_train = ", len(data_train) # print "x = ", len(x) print fourier_score fourier_score_list.append(fourier_score) plt.figure(figsize=(15.0, 11.0)) plt.boxplot(fourier_score_list) plt.ylabel("R2 Score", fontsize=20) plt.xlabel("Number of sampling points", fontsize=20) plt.show()
from utilities import * import data_parser_mathew num_iteration = [10, 100, 1000, 10000, 50000, 100000, 200000] # num_iteration = [10, 20, 30] score_list = [] for i in num_iteration: input_data, output_data = data_parser_mathew.data_parser( num_in_samples=70000) test_input_data, test_output_data = data_parser_mathew.data_parser( num_in_samples=1000) svm_model = DecisionTreeRegressor(max_depth=20) rbm = BernoulliRBM(random_state=0, verbose=True) classifier = Pipeline(steps=[('rbm', rbm), ('svm', svm_model)]) rbm.learning_rate = 0.001 rbm.n_iter = i # More components tend to give better prediction performance, but larger # fitting time rbm.n_components = 8 classifier.fit(input_data, output_data) model_score = repr( r2_score(test_output_data, classifier.predict(test_input_data))) print model_score score_list.append(model_score) score_list = map(float, score_list) plt.figure()
from utilities import * import data_parser_mathew num_iteration = [10, 100, 1000, 10000, 50000, 100000, 200000] # num_iteration = [10, 20, 30] score_list = [] for i in num_iteration: input_data, output_data = data_parser_mathew.data_parser(num_in_samples=70000) test_input_data, test_output_data = data_parser_mathew.data_parser(num_in_samples=1000) svm_model = DecisionTreeRegressor(max_depth=20) rbm = BernoulliRBM(random_state=0, verbose=True) classifier = Pipeline(steps=[('rbm', rbm), ('svm', svm_model)]) rbm.learning_rate = 0.001 rbm.n_iter = i # More components tend to give better prediction performance, but larger # fitting time rbm.n_components = 8 classifier.fit(input_data, output_data) model_score = repr(r2_score(test_output_data, classifier.predict(test_input_data))) print model_score score_list.append(model_score) score_list = map(float, score_list) plt.figure() plt.plot(num_iteration, score_list) plt.ylabel("R2 score") plt.xlabel("Number of iteration")
def svm_gp(): final_results = {} # for dimension in [2, 3, 4, 5, 6, 12]: for dimension in [6]: print "Current Dimension = ", dimension X, y = data_parser(num_in_samples=15000, file_name="all_outputs_" + str(dimension) + "d.txt") gp_data, gp_target = data_parser(num_in_samples=10, file_name="all_outputs_" + str(dimension) + "d.txt") data_test, targets_test = data_parser(num_in_samples=1000, file_name="all_outputs_" + str(dimension) + "d.txt") # Fit regression model svm_non_linear = svm.NuSVR() # svm_non_linear = svm.SVR(C=10, kernel='poly', epsilon=0.02, degree=5, tol=1e-5) y_non_linear = svm_non_linear.fit(X, y) gp_data_base = {} for i in range(len(gp_target)): gp_data_base[gp_target[i]] = gp_data[i] toolbar_width = 100 sys.stdout.write("[%s]" % (" " * toolbar_width)) sys.stdout.flush() sys.stdout.write( "\b" * (toolbar_width + 1)) # return to start of line, after '[' for i in range(100): gp = GaussianProcess(corr='squared_exponential', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100, nugget=3.00e-14) # Pick a random sample gp.fit(gp_data_base.values(), gp_data_base.keys()) random_data, random_target = data_parser(num_in_samples=1, file_name="all_outputs_" + str(dimension) + "d.txt") gp_data_base[random_target[0]] = random_data[0] # Get the score for this point from # model_0 = svm_non_linear.predict(random_data) + gp.predict(random_data) # model_1 = svm_non_linear.predict(random_data) * gp.predict(random_data) # y_pred, MSE = gp.predict(data_test, eval_MSE=True) # print MSE sys.stdout.write("-") sys.stdout.flush() sys.stdout.write("\n") # model_0 = svm_non_linear.predict(data_test) + gp.predict(data_test) final_results[dimension] = { "model_0": svm_non_linear.predict(data_test), "model_1": svm_non_linear.predict(data_test) + gp.predict(data_test, eval_MSE=True)[1], "model_2": -1 * svm_non_linear.predict(data_test) * gp.predict(data_test) } model_0 = svm_non_linear.predict(data_test) model_1 = svm_non_linear.predict(data_test) + gp.predict( data_test, eval_MSE=True)[1] model_2 = -1 * svm_non_linear.predict(data_test) * gp.predict( data_test) model_3 = (svm_non_linear.predict(data_test) + gp.predict(data_test)) * 0.5 print "model_0 score = ", repr(r2_score(targets_test, model_0)) print "model_1 score = ", repr(r2_score(targets_test, model_1)) print "model_2 score = ", repr(r2_score(targets_test, model_2)) print "model_3 score = ", repr(r2_score(targets_test, model_3)) print "---------------------------"