def main():
    sg.omp_set_num_threads(4)
    lasso()
    print "Finished lasso"
    en()
    print "Finished en"
    group_lasso()
    print "Finished group lasso"
    lasso_heat()
    print "Finished lasso-heat"
    grp_heat()
    print "Finished grp-heat"
## function is sufficiently smooth and then imposes a multivariate Gaussian with the
## covariance matrix
## \f$\mathbf{\Gamma}_{i,i} = 0.25^{\vert \mathbf{l} \vert_1 - d}\f$, where
## \f$ d \f$ corresponds to the dimension of the grid and
## \f$ \mathbf{\vert \mathbf{l} \vert_1} \f$ to the level sum of the ith grid point.

import requests as r
import numpy as np
import pandas as pd
import sklearn.preprocessing as pre
from sklearn.cross_validation import KFold
from scipy import stats
from zipfile import ZipFile
import io
import pysgpp as sg
sg.omp_set_num_threads(4)


## This function scales all predictors so that they are suitable for sparse grids.
def scale(df, scaler=None):
    Y = df.ix[:,
              -1]  # save Y (don't need to transform it/useless for cat. data!)
    X = df.values
    if scaler:
        X = scaler.transform(X)
    else:
        scaler = pre.MinMaxScaler()
        X = scaler.fit_transform(X)
    index = df.index
    columns = df.columns
    df = pd.DataFrame(data=X, index=index, columns=columns)
Exemple #3
0
    """Example objective function from the title of my Master's thesis."""
    def __init__(self):
        super(ExampleFunction, self).__init__(2)

    def eval(self, x):
        """Evaluates the function."""
        return math.sin(8.0 * x[0]) + math.sin(7.0 * x[1])


def printLine():
    print("----------------------------------------" + \
          "----------------------------------------")


## We have to disable OpenMP within pysgpp since it interferes with SWIG's director feature.
pysgpp.omp_set_num_threads(1)

print("sgpp::optimization example program started.\n")
# increase verbosity of the output
pysgpp.OptPrinter.getInstance().setVerbosity(2)

## Here, we define some parameters: objective function, dimensionality,
## B-spline degree, maximal number of grid points, and adaptivity.
# objective function
f = ExampleFunction()
# dimension of domain
d = f.getNumberOfParameters()
# B-spline degree
p = 3
# maximal number of grid points
N = 30
Exemple #4
0
#!/usr/bin/python
# Copyright (C) 2008-today The SG++ project
# This file is part of the SG++ project. For conditions of distribution and
# use, please see the copyright notice provided with SG++ or at
# sgpp.sparsegrids.org

## \page example_interactionExample_py Interaction Terms Aware Sparse Grids
##
## This example compares standard sparse grids with sparse grids that only contain
## a subset of all possible interaction terms.
## It uses the optical digits dataset as an example.

import numpy as np
import pysgpp as sg; sg.omp_set_num_threads(4)
import pandas as pd
import sklearn.preprocessing as pre

## This function scales all predictors so that they are suitable for sparse grids.
def scale(df, scaler=None):
    Y = df.ix[:,-1] # save Y (don't need to transform it/useless for cat. data!)
    X = df.values
    if scaler:
        X = scaler.transform(X)
    else:
        scaler = pre.MinMaxScaler()
        X = scaler.fit_transform(X)
    index = df.index
    columns = df.columns
    df = pd.DataFrame(data=X, index=index, columns=columns)
    df.ix[:,-1] = Y
    return scaler, df
def main(level, num, T, dataset, _log):
    sg.omp_set_num_threads(4)
    if 'optdigits' in dataset:
        if 'sub' in dataset:
            df_train = get_dataset('optdigits_sub_train')
            df_test = get_dataset('optdigits_sub_test')
        else:
            df_train = get_dataset('optdigits_train')
            df_test = get_dataset('optdigits_test')
        X_train, y_train = get_xy(df_train)
        X_test, y_test = get_xy(df_test)
    else:
        df = get_dataset(dataset)
        train, test = split(df)
        X_train, y_train = get_xy(train)
        X_test, y_test = get_xy(test)
    _log.debug("Read file.")

    session = model.make_session()
    _log.debug("Created SQL session.")

    grid_config = model.GridConfig(type=6, level=level, T=T)
    adaptivity_config = model.AdaptivityConfig(num_refinements=0,
                                               no_points=3,
                                               treshold=0.0,
                                               percent=0.0)
    epsilon = np.sqrt(np.finfo(np.float).eps)
    solver_type = sg.SLESolverType_CG
    solver_config = model.SolverConfig(type=solver_type,
                                       max_iterations=70,
                                       epsilon=epsilon,
                                       threshold=10e-5)
    final_solver_config = model.SolverConfig(type=solver_type,
                                             max_iterations=150,
                                             epsilon=epsilon,
                                             threshold=10e-6)

    # solver_type = sg.SLESolverType_FISTA
    # solver_config = model.SolverConfig(type=solver_type, max_iterations=200, epsilon=0.0, threshold=10e-5)
    # final_solver_config = model.SolverConfig(type=solver_type, max_iterations=400, epsilon=0.0, threshold=10e-8)
    regularization_type = sg.RegularizationType_Identity
    #regularization_type = sg.RegularizationType_ElasticNet
    regularization_config = model.RegularizationConfig(
        type=regularization_type, l1_ratio=0.05, exponent_base=1.0)
    experiment = model.Experiment(dataset=dataset)

    session.add(grid_config)
    session.add(adaptivity_config)
    session.add(solver_config)
    session.add(final_solver_config)
    session.add(experiment)

    _log.debug("Created configurations.")

    interactions = None
    #interactions = mnist_interactions(l2_distance, 2*np.sqrt(2), level)
    grid_config.interactions = str(interactions)
    if 'optdigits' in dataset:
        estimator = SGClassificationLearner(grid_config, regularization_config,
                                            solver_config, final_solver_config,
                                            adaptivity_config, interactions)
    else:
        estimator = SGRegressionLearner(grid_config, regularization_config,
                                        solver_config, final_solver_config,
                                        adaptivity_config, interactions)
    cv = get_cv(dataset, y_train)
    experiment.cv = str(cv)
    #lambda_grid = np.logspace(-6, -1, num=num)
    lambda_grid = [0.1]
    parameters = {'regularization_config__lambda_reg': lambda_grid}
    grid_search = GridSearch(estimator, parameters, cv)
    _log.info("Start learning.")
    grid_search.fit(X_train, y_train)
    _log.info("Finished learning.")

    first = True
    for score in sorted(grid_search.grid_scores_,
                        key=itemgetter(1),
                        reverse=True):
        if 'optdigits' in dataset:
            error_mult = 1
        else:
            error_mult = -1
        validation_mse = error_mult * score.mean_validation_score
        validation_std = np.std(np.abs(score.cv_validation_scores))
        validation_grid_sizes = score.cv_grid_sizes
        params = estimator.get_params()
        params.update(score.parameters)
        regularization_config = model.RegularizationConfig(
            type=params['regularization_config__type'],
            lambda_reg=params['regularization_config__lambda_reg'],
            exponent_base=params['regularization_config__exponent_base'],
            l1_ratio=params['regularization_config__l1_ratio'])
        session.add(regularization_config)
        result = model.Result(
            validation_mse=validation_mse,
            validation_std=validation_std,
            grid_config=grid_config,
            adaptivity_config=adaptivity_config,
            solver_config=solver_config,
            final_solver_config=final_solver_config,
            regularization_config=regularization_config,
            experiment=experiment,
            validation_grid_points_mean=np.mean(validation_grid_sizes),
            validation_grid_points_stdev=np.std(validation_grid_sizes))
        # Retrain best learner and validate test set:
        if first:
            first = False
            estimator.set_params(**params)
            estimator.fit(X_train, y_train)
            result.train_grid_points = estimator.get_grid_size()
            result.train_mse = error_mult * estimator.score(X_train, y_train)
            result.test_mse = error_mult * estimator.score(X_test, y_test)
            if 'optdigits' not in dataset:
                result.train_r2 = get_r_squared(estimator, X_train, y_train)
                result.test_r2 = get_r_squared(estimator, X_test, y_test)

        session.add(result)

    _log.debug("Pushing to database.")
    session.commit()

    _log.info("Finished experiment.")
def main(level, num, num_init, T, dataset, _log):
    sg.omp_set_num_threads(4)
    if 'optdigits' in dataset:
        df_train = get_dataset('optdigits_train')
        df_test = get_dataset('optdigits_test')
        X_train, y_train = get_xy(df_train)
        X_test, y_test = get_xy(df_test)
    else:
        df = get_dataset(dataset)
        train, test = split(df)
        X_train, y_train = get_xy(train)
        X_test, y_test = get_xy(test)
    _log.debug("Read file.")

    session = model.make_session()
    _log.debug("Created SQL session.")

    grid_config = model.GridConfig(type=6, level=level, T=T)
    adaptivity_config = model.AdaptivityConfig(num_refinements=0, no_points=0, treshold=0.0, percent=0.0)
    epsilon = np.sqrt(np.finfo(np.float).eps)
    solver_type = sg.SLESolverType_CG
    solver_config = model.SolverConfig(type=solver_type, max_iterations=70, epsilon=epsilon, threshold=10e-5)
    final_solver_config = model.SolverConfig(type=solver_type, max_iterations=250, epsilon=epsilon, threshold=10e-6)
    # solver_type = sg.SLESolverType_FISTA
    # solver_config = model.SolverConfig(type=solver_type, max_iterations=200, epsilon=0.0, threshold=10e-5)
    # final_solver_config = model.SolverConfig(type=solver_type, max_iterations=400, epsilon=0.0, threshold=10e-6)
    regularization_type = sg.RegularizationType_Identity
    regularization_config = model.RegularizationConfig(type=regularization_type, l1_ratio=1.0, exponent_base=1.0)
    experiment = model.Experiment(dataset=dataset)

    _log.debug("Created configurations.")

    interactions = [[0,1]] + [[i] for i in range(0,11)]
    #interactions = None
    grid_config.interactions = str(interactions)
    estimator = SGRegressionLearner(grid_config, regularization_config, solver_config,
                                    final_solver_config, adaptivity_config, interactions)

    cv = get_cv(dataset, X_train)
    experiment.cv = str(cv)
    params = [Hyp_param('regularization_config__lambda_reg', 0.0, 0.3)]
              #Hyp_param('regularization_config__exponent_base', 3.0, 7.0)]

    bayes_search = BayesOptReg(estimator, cv, X_train, y_train,
                               params, num, n_init_samples=num_init)

    _log.info("Start learning.")
    validation_score, best_params, cv_grid_sizes = bayes_search.optimize()
    _log.info("Finished learning.")
    _log.info("Best CV-MSE was {}. With params {}".format(validation_score, best_params))
    validation_mse = validation_score
    validation_grid_sizes = cv_grid_sizes

    #Retrain estimator with best parameters
    params = estimator.get_params()
    params.update(best_params)
    estimator.set_params(**params)

    estimator.fit(X_train, y_train)

    result = model.Result(validation_mse=validation_mse,
                            grid_config=grid_config,
                            adaptivity_config=adaptivity_config,
                            solver_config=solver_config,
                            final_solver_config=final_solver_config,
                            regularization_config=regularization_config,
                            experiment=experiment,
                            validation_grid_points_mean=np.mean(validation_grid_sizes),
                            validation_grid_points_stdev=np.std(validation_grid_sizes))

    result.train_grid_points = estimator.get_grid_size()
    result.train_mse = -estimator.score(X_train, y_train)
    result.train_r2 = get_r_squared(estimator, X_train, y_train)
    result.test_mse = -estimator.score(X_test, y_test)
    result.test_r2 = get_r_squared(estimator, X_test, y_test)

    regularization_config = model.RegularizationConfig(
                            type=params['regularization_config__type'],
                            lambda_reg=params['regularization_config__lambda_reg'],
                            exponent_base=params['regularization_config__exponent_base'])

    session.add(grid_config)
    session.add(adaptivity_config)
    session.add(solver_config)
    session.add(final_solver_config)
    session.add(experiment)
    session.add(regularization_config)
    session.add(result)

    _log.debug("Pushing to database.")
    session.commit()

    _log.info("Finished experiment.")
        super(ExampleFunction, self).__init__(2)
    
    def eval(self, x):
        """Evaluates the function."""
        return math.sin(8.0 * x[0]) + math.sin(7.0 * x[1])



def printLine():
    print "----------------------------------------" + \
          "----------------------------------------"



# disable multi-threading
pysgpp.omp_set_num_threads(1)
# increase output verbosity
pysgpp.OptPrinter.getInstance().setVerbosity(2)

print "SGPP::optimization example program started.\n"

# objective function
f = ExampleFunction()
# dimension of domain
d = f.getNumberOfParameters()
# B-spline degree
p = 3
# maximal number of grid points
N = 30
# adaptivity of grid generation
gamma = 0.95