def main(): sg.omp_set_num_threads(4) lasso() print "Finished lasso" en() print "Finished en" group_lasso() print "Finished group lasso" lasso_heat() print "Finished lasso-heat" grp_heat() print "Finished grp-heat"
## function is sufficiently smooth and then imposes a multivariate Gaussian with the ## covariance matrix ## \f$\mathbf{\Gamma}_{i,i} = 0.25^{\vert \mathbf{l} \vert_1 - d}\f$, where ## \f$ d \f$ corresponds to the dimension of the grid and ## \f$ \mathbf{\vert \mathbf{l} \vert_1} \f$ to the level sum of the ith grid point. import requests as r import numpy as np import pandas as pd import sklearn.preprocessing as pre from sklearn.cross_validation import KFold from scipy import stats from zipfile import ZipFile import io import pysgpp as sg sg.omp_set_num_threads(4) ## This function scales all predictors so that they are suitable for sparse grids. def scale(df, scaler=None): Y = df.ix[:, -1] # save Y (don't need to transform it/useless for cat. data!) X = df.values if scaler: X = scaler.transform(X) else: scaler = pre.MinMaxScaler() X = scaler.fit_transform(X) index = df.index columns = df.columns df = pd.DataFrame(data=X, index=index, columns=columns)
"""Example objective function from the title of my Master's thesis.""" def __init__(self): super(ExampleFunction, self).__init__(2) def eval(self, x): """Evaluates the function.""" return math.sin(8.0 * x[0]) + math.sin(7.0 * x[1]) def printLine(): print("----------------------------------------" + \ "----------------------------------------") ## We have to disable OpenMP within pysgpp since it interferes with SWIG's director feature. pysgpp.omp_set_num_threads(1) print("sgpp::optimization example program started.\n") # increase verbosity of the output pysgpp.OptPrinter.getInstance().setVerbosity(2) ## Here, we define some parameters: objective function, dimensionality, ## B-spline degree, maximal number of grid points, and adaptivity. # objective function f = ExampleFunction() # dimension of domain d = f.getNumberOfParameters() # B-spline degree p = 3 # maximal number of grid points N = 30
#!/usr/bin/python # Copyright (C) 2008-today The SG++ project # This file is part of the SG++ project. For conditions of distribution and # use, please see the copyright notice provided with SG++ or at # sgpp.sparsegrids.org ## \page example_interactionExample_py Interaction Terms Aware Sparse Grids ## ## This example compares standard sparse grids with sparse grids that only contain ## a subset of all possible interaction terms. ## It uses the optical digits dataset as an example. import numpy as np import pysgpp as sg; sg.omp_set_num_threads(4) import pandas as pd import sklearn.preprocessing as pre ## This function scales all predictors so that they are suitable for sparse grids. def scale(df, scaler=None): Y = df.ix[:,-1] # save Y (don't need to transform it/useless for cat. data!) X = df.values if scaler: X = scaler.transform(X) else: scaler = pre.MinMaxScaler() X = scaler.fit_transform(X) index = df.index columns = df.columns df = pd.DataFrame(data=X, index=index, columns=columns) df.ix[:,-1] = Y return scaler, df
def main(level, num, T, dataset, _log): sg.omp_set_num_threads(4) if 'optdigits' in dataset: if 'sub' in dataset: df_train = get_dataset('optdigits_sub_train') df_test = get_dataset('optdigits_sub_test') else: df_train = get_dataset('optdigits_train') df_test = get_dataset('optdigits_test') X_train, y_train = get_xy(df_train) X_test, y_test = get_xy(df_test) else: df = get_dataset(dataset) train, test = split(df) X_train, y_train = get_xy(train) X_test, y_test = get_xy(test) _log.debug("Read file.") session = model.make_session() _log.debug("Created SQL session.") grid_config = model.GridConfig(type=6, level=level, T=T) adaptivity_config = model.AdaptivityConfig(num_refinements=0, no_points=3, treshold=0.0, percent=0.0) epsilon = np.sqrt(np.finfo(np.float).eps) solver_type = sg.SLESolverType_CG solver_config = model.SolverConfig(type=solver_type, max_iterations=70, epsilon=epsilon, threshold=10e-5) final_solver_config = model.SolverConfig(type=solver_type, max_iterations=150, epsilon=epsilon, threshold=10e-6) # solver_type = sg.SLESolverType_FISTA # solver_config = model.SolverConfig(type=solver_type, max_iterations=200, epsilon=0.0, threshold=10e-5) # final_solver_config = model.SolverConfig(type=solver_type, max_iterations=400, epsilon=0.0, threshold=10e-8) regularization_type = sg.RegularizationType_Identity #regularization_type = sg.RegularizationType_ElasticNet regularization_config = model.RegularizationConfig( type=regularization_type, l1_ratio=0.05, exponent_base=1.0) experiment = model.Experiment(dataset=dataset) session.add(grid_config) session.add(adaptivity_config) session.add(solver_config) session.add(final_solver_config) session.add(experiment) _log.debug("Created configurations.") interactions = None #interactions = mnist_interactions(l2_distance, 2*np.sqrt(2), level) grid_config.interactions = str(interactions) if 'optdigits' in dataset: estimator = SGClassificationLearner(grid_config, regularization_config, solver_config, final_solver_config, adaptivity_config, interactions) else: estimator = SGRegressionLearner(grid_config, regularization_config, solver_config, final_solver_config, adaptivity_config, interactions) cv = get_cv(dataset, y_train) experiment.cv = str(cv) #lambda_grid = np.logspace(-6, -1, num=num) lambda_grid = [0.1] parameters = {'regularization_config__lambda_reg': lambda_grid} grid_search = GridSearch(estimator, parameters, cv) _log.info("Start learning.") grid_search.fit(X_train, y_train) _log.info("Finished learning.") first = True for score in sorted(grid_search.grid_scores_, key=itemgetter(1), reverse=True): if 'optdigits' in dataset: error_mult = 1 else: error_mult = -1 validation_mse = error_mult * score.mean_validation_score validation_std = np.std(np.abs(score.cv_validation_scores)) validation_grid_sizes = score.cv_grid_sizes params = estimator.get_params() params.update(score.parameters) regularization_config = model.RegularizationConfig( type=params['regularization_config__type'], lambda_reg=params['regularization_config__lambda_reg'], exponent_base=params['regularization_config__exponent_base'], l1_ratio=params['regularization_config__l1_ratio']) session.add(regularization_config) result = model.Result( validation_mse=validation_mse, validation_std=validation_std, grid_config=grid_config, adaptivity_config=adaptivity_config, solver_config=solver_config, final_solver_config=final_solver_config, regularization_config=regularization_config, experiment=experiment, validation_grid_points_mean=np.mean(validation_grid_sizes), validation_grid_points_stdev=np.std(validation_grid_sizes)) # Retrain best learner and validate test set: if first: first = False estimator.set_params(**params) estimator.fit(X_train, y_train) result.train_grid_points = estimator.get_grid_size() result.train_mse = error_mult * estimator.score(X_train, y_train) result.test_mse = error_mult * estimator.score(X_test, y_test) if 'optdigits' not in dataset: result.train_r2 = get_r_squared(estimator, X_train, y_train) result.test_r2 = get_r_squared(estimator, X_test, y_test) session.add(result) _log.debug("Pushing to database.") session.commit() _log.info("Finished experiment.")
def main(level, num, num_init, T, dataset, _log): sg.omp_set_num_threads(4) if 'optdigits' in dataset: df_train = get_dataset('optdigits_train') df_test = get_dataset('optdigits_test') X_train, y_train = get_xy(df_train) X_test, y_test = get_xy(df_test) else: df = get_dataset(dataset) train, test = split(df) X_train, y_train = get_xy(train) X_test, y_test = get_xy(test) _log.debug("Read file.") session = model.make_session() _log.debug("Created SQL session.") grid_config = model.GridConfig(type=6, level=level, T=T) adaptivity_config = model.AdaptivityConfig(num_refinements=0, no_points=0, treshold=0.0, percent=0.0) epsilon = np.sqrt(np.finfo(np.float).eps) solver_type = sg.SLESolverType_CG solver_config = model.SolverConfig(type=solver_type, max_iterations=70, epsilon=epsilon, threshold=10e-5) final_solver_config = model.SolverConfig(type=solver_type, max_iterations=250, epsilon=epsilon, threshold=10e-6) # solver_type = sg.SLESolverType_FISTA # solver_config = model.SolverConfig(type=solver_type, max_iterations=200, epsilon=0.0, threshold=10e-5) # final_solver_config = model.SolverConfig(type=solver_type, max_iterations=400, epsilon=0.0, threshold=10e-6) regularization_type = sg.RegularizationType_Identity regularization_config = model.RegularizationConfig(type=regularization_type, l1_ratio=1.0, exponent_base=1.0) experiment = model.Experiment(dataset=dataset) _log.debug("Created configurations.") interactions = [[0,1]] + [[i] for i in range(0,11)] #interactions = None grid_config.interactions = str(interactions) estimator = SGRegressionLearner(grid_config, regularization_config, solver_config, final_solver_config, adaptivity_config, interactions) cv = get_cv(dataset, X_train) experiment.cv = str(cv) params = [Hyp_param('regularization_config__lambda_reg', 0.0, 0.3)] #Hyp_param('regularization_config__exponent_base', 3.0, 7.0)] bayes_search = BayesOptReg(estimator, cv, X_train, y_train, params, num, n_init_samples=num_init) _log.info("Start learning.") validation_score, best_params, cv_grid_sizes = bayes_search.optimize() _log.info("Finished learning.") _log.info("Best CV-MSE was {}. With params {}".format(validation_score, best_params)) validation_mse = validation_score validation_grid_sizes = cv_grid_sizes #Retrain estimator with best parameters params = estimator.get_params() params.update(best_params) estimator.set_params(**params) estimator.fit(X_train, y_train) result = model.Result(validation_mse=validation_mse, grid_config=grid_config, adaptivity_config=adaptivity_config, solver_config=solver_config, final_solver_config=final_solver_config, regularization_config=regularization_config, experiment=experiment, validation_grid_points_mean=np.mean(validation_grid_sizes), validation_grid_points_stdev=np.std(validation_grid_sizes)) result.train_grid_points = estimator.get_grid_size() result.train_mse = -estimator.score(X_train, y_train) result.train_r2 = get_r_squared(estimator, X_train, y_train) result.test_mse = -estimator.score(X_test, y_test) result.test_r2 = get_r_squared(estimator, X_test, y_test) regularization_config = model.RegularizationConfig( type=params['regularization_config__type'], lambda_reg=params['regularization_config__lambda_reg'], exponent_base=params['regularization_config__exponent_base']) session.add(grid_config) session.add(adaptivity_config) session.add(solver_config) session.add(final_solver_config) session.add(experiment) session.add(regularization_config) session.add(result) _log.debug("Pushing to database.") session.commit() _log.info("Finished experiment.")
super(ExampleFunction, self).__init__(2) def eval(self, x): """Evaluates the function.""" return math.sin(8.0 * x[0]) + math.sin(7.0 * x[1]) def printLine(): print "----------------------------------------" + \ "----------------------------------------" # disable multi-threading pysgpp.omp_set_num_threads(1) # increase output verbosity pysgpp.OptPrinter.getInstance().setVerbosity(2) print "SGPP::optimization example program started.\n" # objective function f = ExampleFunction() # dimension of domain d = f.getNumberOfParameters() # B-spline degree p = 3 # maximal number of grid points N = 30 # adaptivity of grid generation gamma = 0.95