Example #1
0
def evaluation_meansquarederror_modular(ground_truth, predicted):
    from shogun.Features import RegressionLabels
    from shogun.Evaluation import MeanSquaredError

    ground_truth_labels = RegressionLabels(ground_truth)
    predicted_labels = RegressionLabels(predicted)

    evaluator = MeanSquaredError()
    mse = evaluator.evaluate(predicted_labels, ground_truth_labels)

    return mse
def regression_gaussian_process_modular(
    traindata_real=traindat, testdata_real=testdat, trainlab=label_traindat, width=2.1
):
    from numpy.random import randn
    from shogun.Features import RealFeatures, RegressionLabels
    from shogun.Kernel import GaussianKernel

    try:
        from shogun.Regression import GaussianLikelihood, ZeroMean, ExactInferenceMethod, GaussianProcessRegression
    except ImportError:
        print "Eigen3 needed for Gaussian Processes"
        return

    labels = RegressionLabels(trainlab)

    feats_train = RealFeatures(traindata_real)
    feats_test = RealFeatures(testdata_real)
    kernel = GaussianKernel(feats_train, feats_train, width)
    zmean = ZeroMean()
    lik = GaussianLikelihood()
    inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik)
    gp = GaussianProcessRegression(inf, feats_train, labels)

    alpha = inf.get_alpha()
    diagonal = inf.get_diagonal_vector()
    cholesky = inf.get_cholesky()
    gp.set_return_type(GaussianProcessRegression.GP_RETURN_COV)

    covariance = gp.apply_regression(feats_test)

    gp.set_return_type(GaussianProcessRegression.GP_RETURN_MEANS)

    predictions = gp.apply_regression()

    print ("Alpha Vector")
    print (alpha)

    print ("Labels")
    print (labels.get_labels())

    print ("sW Matrix")
    print (diagonal)

    print ("Covariances")
    print (covariance.get_labels())

    print ("Mean Predictions")
    print (predictions.get_labels())

    print ("Cholesky Matrix L")
    print (cholesky)
    return gp, alpha, labels, diagonal, covariance, predictions, cholesky
def regression_gaussian_process_modular (traindata_real=traindat, \
		testdata_real=testdat, \
		trainlab=label_traindat, width=2.1):
	from numpy.random import randn
	from shogun.Features import RealFeatures, RegressionLabels
	from shogun.Kernel import GaussianKernel
	try:
		from shogun.Regression import GaussianLikelihood, ZeroMean, \
				ExactInferenceMethod, GaussianProcessRegression
	except ImportError:
		print "Eigen3 needed for Gaussian Processes"
		return

	labels=RegressionLabels(trainlab)

	feats_train=RealFeatures(traindata_real)
	feats_test=RealFeatures(testdata_real)
	kernel=GaussianKernel(feats_train, feats_train, width)
	zmean = ZeroMean()
	lik = GaussianLikelihood()
	inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik)
	gp = GaussianProcessRegression(inf, feats_train, labels)

	alpha = inf.get_alpha()
	diagonal = inf.get_diagonal_vector()
	cholesky = inf.get_cholesky()
	gp.set_return_type(GaussianProcessRegression.GP_RETURN_COV)

	covariance = gp.apply_regression(feats_test)

	gp.set_return_type(GaussianProcessRegression.GP_RETURN_MEANS)

	predictions = gp.apply_regression()

	print("Alpha Vector")
	print(alpha)

	print("Labels")
	print(labels.get_labels())

	print("sW Matrix")
	print(diagonal)

	print("Covariances")
	print(covariance.get_labels())

	print("Mean Predictions")
	print(predictions.get_labels())

	print("Cholesky Matrix L")
	print(cholesky)
	return gp, alpha, labels, diagonal, covariance, predictions, cholesky
def regression_gaussian_process_modular (n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from shogun.Features import RealFeatures, RegressionLabels
    from shogun.Kernel import GaussianKernel
    try:
        from shogun.Regression import GaussianLikelihood, ZeroMean, \
          ExactInferenceMethod, GaussianProcessRegression
    except ImportError:
        print("Eigen3 needed for Gaussian Processes")
        return

    # reproducable results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    # GP specification
    width = 1
    shogun_width = width * width * 2
    kernel = GaussianKernel(10, shogun_width)
    zmean = ZeroMean()
    lik = GaussianLikelihood()
    inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik)
    gp = GaussianProcessRegression(inf, feats_train, labels)

    # some things we can do
    alpha = inf.get_alpha()
    diagonal = inf.get_diagonal_vector()
    cholesky = inf.get_cholesky()

    # inference
    gp.set_return_type(GaussianProcessRegression.GP_RETURN_MEANS)
    mean = gp.apply_regression(feats_test)
    gp.set_return_type(GaussianProcessRegression.GP_RETURN_COV)
    covariance = gp.apply_regression(feats_test)

    # plot results
    #plot(X[0],Y[0],'x') # training observations
    #plot(X_test[0],Y_test[0],'-') # ground truth of test
    #plot(X_test[0],mean.get_labels(), '-') # mean predictions of test
    #legend(["training", "ground truth", "mean predictions"])

    #show()

    return gp, alpha, labels, diagonal, covariance, mean, cholesky
Example #5
0
def regression_svrlight_modular(fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
        width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):

    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Kernel import GaussianKernel
    try:
        from shogun.Regression import SVRLight
    except ImportError:
        print('No support for SVRLight available.')
        return

    feats_train = RealFeatures(fm_train)
    feats_test = RealFeatures(fm_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = RegressionLabels(label_train)

    svr = SVRLight(C, epsilon, kernel, labels)
    svr.set_tube_epsilon(tube_epsilon)
    svr.parallel.set_num_threads(num_threads)
    svr.train()

    kernel.init(feats_train, feats_test)
    out = svr.apply().get_labels()

    return out, kernel
Example #6
0
        def RunLinearRegressionShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the responses
            # file.
            try:
                Log.Info("Loading dataset", self.verbose)
                if len(self.dataset) == 2:
                    X = np.genfromtxt(self.dataset[0], delimiter=',')
                    y = np.genfromtxt(self.dataset[1], delimiter=',')
                else:
                    X = np.genfromtxt(self.dataset, delimiter=',')
                    y = X[:, (X.shape[1] - 1)]
                    X = X[:, :-1]

                with totalTimer:
                    # Perform linear regression.
                    model = LeastSquaresRegression(RealFeatures(X.T),
                                                   RegressionLabels(y))
                    model.train()
                    b = model.get_w()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Example #7
0
        def RunLARSShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            try:
                Log.Info("Loading dataset", self.verbose)
                inputData = np.genfromtxt(self.dataset[0], delimiter=',')
                responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
                inputFeat = RealFeatures(inputData.T)
                responsesFeat = RegressionLabels(responsesData)

                # Get all the parameters.
                lambda1 = re.search("-l (\d+)", options)
                lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))

                with totalTimer:
                    # Perform LARS.
                    model = LeastAngleRegression(False)
                    model.set_max_l1_norm(lambda1)
                    model.set_labels(responsesFeat)
                    model.train(inputFeat)
                    model.get_w(model.get_path_size() - 1)
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Example #8
0
def get_labels(raw=False, type='binary'):
	data = concatenate(array(
		(-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES, dtype=double))
	))
	if raw:
		return data
	else:
		if type == 'binary':
			return BinaryLabels(data)
		if type == 'regression':
			return RegressionLabels(data)
		return None
def regression_linear_ridge_modular(fm_train=traindat,
                                    fm_test=testdat,
                                    label_train=label_traindat,
                                    tau=1e-6):

    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Regression import LinearRidgeRegression

    rr = LinearRidgeRegression(tau, RealFeatures(traindat),
                               RegressionLabels(label_train))
    rr.train()
    out = rr.apply(RealFeatures(fm_test)).get_labels()
    return out, rr
def krr_short():
    print('KRR_short')
    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import KernelRidgeRegression

    width = 0.8
    tau = 1e-6
    krr = KernelRidgeRegression(tau, GaussianKernel(0, width),
                                RegressionLabels(label_train))
    krr.train(RealFeatures(fm_train))
    out = krr.apply(RealFeatures(fm_test)).get_labels()

    return krr, out
Example #11
0
def regression_least_squares_modular(fm_train=traindat,
                                     fm_test=testdat,
                                     label_train=label_traindat,
                                     tau=1e-6):

    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import LeastSquaresRegression

    ls = LeastSquaresRegression(RealFeatures(traindat),
                                RegressionLabels(label_train))
    ls.train()
    out = ls.apply(RealFeatures(fm_test)).get_labels()
    return out, ls
def evaluation_cross_validation_regression(fm_train=traindat,
                                           fm_test=testdat,
                                           label_train=label_traindat,
                                           width=0.8,
                                           tau=1e-6):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import KernelRidgeRegression

    # training data
    features = RealFeatures(fm_train)
    labels = RegressionLabels(label_train)

    # kernel and predictor
    kernel = GaussianKernel()
    predictor = KernelRidgeRegression(tau, kernel, labels)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but here, the std x-val is used
    splitting_strategy = CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MeanSquaredError()

    # cross-validation instance
    cross_validation = CrossValidation(predictor, features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # (optional) tell machine to precompute kernel matrix. speeds up. may not work
    predictor.data_lock(labels, features)

    # perform cross-validation and print(results)
    result = cross_validation.evaluate()
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \
		x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR

	# reproducable results
	random.seed(seed)
	
	# easy regression data: one dimensional noisy sine wave
	n=15
	n_test=100
	x_range_test=10
	noise_var=0.5;
	X=random.rand(1,n)*x_range
	
	X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]])
	Y_test=sin(X_test)
	Y=sin(X)+random.randn(n)*noise_var
	
	# shogun representation
	labels=RegressionLabels(Y[0])
	feats_train=RealFeatures(X)
	feats_test=RealFeatures(X_test)

	kernel=GaussianKernel(feats_train, feats_train, width)
	
	# two svr models: epsilon and nu
	svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.train()
	svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
	svr_nu.train()

	# predictions
	kernel.init(feats_train, feats_test)
	out1_epsilon=svr_epsilon.apply().get_labels()
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	out1_nu=svr_epsilon.apply().get_labels()
	out2_nu=svr_epsilon.apply(feats_test).get_labels()

	return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
Example #14
0
def get_labels (num, ltype='twoclass'):
	"""Return labels used for classification.

	@param num Number of labels
	@param ltype Type of labels, either twoclass or series.
	@return Tuple to contain the labels as numbers in a tuple and labels as objects digestable for Shogun.
	"""

	labels=[]
	if ltype=='twoclass':
		labels.append(random.rand(num).round()*2-1)
		# essential to wrap in array(), will segfault sometimes otherwise
		labels.append(BinaryLabels(numpy.array(labels[0])))
	elif ltype=='series':
		labels.append([numpy.double(x) for x in xrange(num)])
		# essential to wrap in array(), will segfault sometimes otherwise
		labels.append(RegressionLabels(numpy.array(labels[0])))
	else:
		return [None, None]

	return labels
def regression_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
           width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):

    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import LibSVR

    feats_train = RealFeatures(fm_train)
    feats_test = RealFeatures(fm_test)

    kernel = GaussianKernel(feats_train, feats_train, width)
    labels = RegressionLabels(label_train)

    svr = LibSVR(C, tube_epsilon, kernel, labels)
    svr.set_epsilon(epsilon)
    svr.train()

    kernel.init(feats_train, feats_test)
    out1 = svr.apply().get_labels()
    out2 = svr.apply(feats_test).get_labels()

    return out1, out2, kernel
def regression_kernel_ridge_modular(fm_train=traindat,
                                    fm_test=testdat,
                                    label_train=label_traindat,
                                    width=0.8,
                                    tau=1e-6):

    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import KernelRidgeRegression

    feats_train = RealFeatures(fm_train)
    feats_test = RealFeatures(fm_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = RegressionLabels(label_train)

    krr = KernelRidgeRegression(tau, kernel, labels)
    krr.train(feats_train)

    kernel.init(feats_train, feats_test)
    out = krr.apply().get_labels()
    return out, kernel, krr
def regression_gaussian_process_modelselection (n=100,n_test=100, \
		x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):
		
	from shogun.Features import RealFeatures, RegressionLabels
	from shogun.Kernel import GaussianKernel
	from shogun.ModelSelection import GradientModelSelection, ModelSelectionParameters, R_LINEAR
	from shogun.Regression import GaussianLikelihood, ZeroMean, \
				ExactInferenceMethod, GaussianProcessRegression, GradientCriterion, \
				GradientEvaluation
		
	# Reproducable results
	random.seed(seed)
	
	# Easy regression data: one dimensional noisy sine wave
	X_train=random.rand(1,n)*x_range
	X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]])
	Y_test=sin(X_test)
	Y_train=sin(X_train)+random.randn(n)*noise_var
	
	# shogun representation
	labels=RegressionLabels(Y_train[0])
	feats_train=RealFeatures(X_train)
	feats_test=RealFeatures(X_test)
		
	# GP specification
	width=1
	shogun_width=width*width*2
	kernel=GaussianKernel(10,shogun_width)
	kernel.init(feats_train,feats_train)
	zmean = ZeroMean()
	likelihood = GaussianLikelihood()
	inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, likelihood)
	gp = GaussianProcessRegression(inf, feats_train, labels)
	
	# Paramter tree for model selection
	root = ModelSelectionParameters() 
	c1 = ModelSelectionParameters("inference_method", inf)
	root.append_child(c1)

	c2 = ModelSelectionParameters("scale")
	c1.append_child(c2)
	c2.build_values(0.01, 4.0, R_LINEAR)

	c3 = ModelSelectionParameters("likelihood_model", likelihood)
	c1.append_child(c3)

	c4 = ModelSelectionParameters("sigma")
	c3.append_child(c4) 
	c4.build_values(0.001, 4.0, R_LINEAR) 

	c5 = ModelSelectionParameters("kernel", kernel) 
	c1.append_child(c5) 

	c6 = ModelSelectionParameters("width") 
	c5.append_child(c6) 
	c6.build_values(0.001, 4.0, R_LINEAR) 

	# Criterion for Gradient Search
	crit = GradientCriterion()
	
	# Evaluate our inference method for its derivatives
	grad = GradientEvaluation(gp, feats_train, labels, crit)
 
	grad.set_function(inf) 

	gp.print_modsel_params() 

	root.print_tree() 

	# Handles all of the above structures in memory
	grad_search = GradientModelSelection(root, grad) 

	# Set autolocking to false to get rid of warnings	
	grad.set_autolock(False) 

	# Search for best parameters
	best_combination = grad_search.select_model(True)

	# Outputs all result and information
	best_combination.print_tree() 
	best_combination.apply_to_machine(gp)

	result = grad.evaluate()
	result.print_result()
    
	#inference
	gp.set_return_type(GaussianProcessRegression.GP_RETURN_COV) 
	covariance = gp.apply_regression(feats_test) 
	covariance = covariance.get_labels() 
    
	gp.set_return_type(GaussianProcessRegression.GP_RETURN_MEANS) 
	mean = gp.apply_regression(feats_test) 
	mean = mean.get_labels() 

	# some things we can do
	alpha = inf.get_alpha()
	diagonal = inf.get_diagonal_vector()
	cholesky = inf.get_cholesky()
	
	# plot results
	plot(X_train[0],Y_train[0],'x') # training observations
	plot(X_test[0],Y_test[0],'-') # ground truth of test
	plot(X_test[0],mean, '-') # mean predictions of test
	
	legend(["training", "ground truth", "mean predictions"])
	
	show()

	return gp, alpha, labels, diagonal, covariance, mean, cholesky
Example #18
0
    prefix = 'kernel_'
    feats = util.get_features(indata, prefix)
    kargs = util.get_args(indata, prefix)
    fun = eval(indata[prefix + 'name'] + 'Kernel')
    kernel = fun(feats['train'], feats['train'], *kargs)

    prefix = 'regression_'
    kernel.parallel.set_num_threads(indata[prefix + 'num_threads'])

    try:
        rfun = eval(indata[prefix + 'name'])
    except NameError, e:
        print "%s is disabled/unavailable!" % indata[prefix + 'name']
        return False

    labels = RegressionLabels(double(indata[prefix + 'labels']))
    if indata[prefix + 'type'] == 'svm':
        regression = rfun(indata[prefix + 'C'], indata[prefix + 'epsilon'],
                          kernel, labels)
    elif indata[prefix + 'type'] == 'kernelmachine':
        regression = rfun(indata[prefix + 'tau'], kernel, labels)
    else:
        return False

    regression.parallel.set_num_threads(indata[prefix + 'num_threads'])
    if indata.has_key(prefix + 'tube_epsilon'):
        regression.set_tube_epsilon(indata[prefix + 'tube_epsilon'])

    regression.train()

    alphas = 0
Example #19
0
X = Xall[0:ntrain, :]
y = yall[0:ntrain]

Xtest = Xall[ntrain:, :]
ytest = yall[ntrain:]

# preprocess data
for i in xrange(p):
    X[:, i] -= np.mean(X[:, i])
    X[:, i] /= np.linalg.norm(X[:, i])
y -= np.mean(y)

# train LASSO
LeastAngleRegression = LeastAngleRegression()
LeastAngleRegression.set_labels(RegressionLabels(y))
LeastAngleRegression.train(RealFeatures(X.T))

# train ordinary LSR
if use_ridge:
    lsr = LinearRidgeRegression(0.01, RealFeatures(X.T), Labels(y))
    lsr.train()
else:
    lsr = LeastSquaresRegression()
    lsr.set_labels(RegressionLabels(y))
    lsr.train(RealFeatures(X.T))

# gather LASSO path
path = np.zeros((p, LeastAngleRegression.get_path_size()))
for i in xrange(path.shape[1]):
    path[:, i] = LeastAngleRegression.get_w(i)
Example #20
0
def modelselection_grid_search_krr_modular(fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
           width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import RegressionLabels
    from shogun.Features import RealFeatures
    from shogun.Regression import KernelRidgeRegression
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters

    # training data
    features_train = RealFeatures(traindat)
    features_test = RealFeatures(testdat)
    labels = RegressionLabels(label_traindat)

    # labels
    labels = RegressionLabels(label_train)

    # predictor, set tau=0 here, doesnt matter
    predictor = KernelRidgeRegression()

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MeanSquaredError()

    # cross-validation instance
    cross_validation = CrossValidation(predictor, features_train, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #predictor.print_modsel_params()

    # build parameter tree to select regularization parameter
    param_tree_root = create_param_tree()

    # model selection instance
    model_selection = GridSearchModelSelection(param_tree_root,
                                               cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #print "parameter tree:"
    #param_tree_root.print_tree()

    #print "starting model selection"
    # print the current parameter combination, if no parameter nothing is printed
    print_state = False

    best_parameters = model_selection.select_model(print_state)

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(predictor)
    result = cross_validation.evaluate()
Example #21
0
def modelselection_grid_search_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
           width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import RegressionLabels
    from shogun.Features import RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import LibSVR
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination

    # training data
    features_train = RealFeatures(traindat)
    labels = RegressionLabels(label_traindat)

    # kernel
    kernel = GaussianKernel(features_train, features_train, width)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #kernel.print_modsel_params()

    labels = RegressionLabels(label_train)

    # predictor
    predictor = LibSVR(C, tube_epsilon, kernel, labels)
    predictor.set_epsilon(epsilon)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MeanSquaredError()

    # cross-validation instance
    cross_validation = CrossValidation(predictor, features_train, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #predictor.print_modsel_params()

    # build parameter tree to select C1 and C2
    param_tree_root = ModelSelectionParameters()
    c1 = ModelSelectionParameters("C1")
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP)

    c2 = ModelSelectionParameters("C2")
    param_tree_root.append_child(c2)
    c2.build_values(-2.0, 2.0, R_EXP)

    # model selection instance
    model_selection = GridSearchModelSelection(param_tree_root,
                                               cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #print "parameter tree"
    #param_tree_root.print_tree()

    #print "starting model selection"
    # print the current parameter combination, if no parameter nothing is printed
    print_state = False
    # lock data before since model selection will not change the kernel matrix
    # (use with care) This avoids that the kernel matrix is recomputed in every
    # iteration of the model search
    predictor.data_lock(labels, features_train)
    best_parameters = model_selection.select_model(print_state)

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(predictor)
    result = cross_validation.evaluate()
Example #22
0
X = Xall[0:ntrain, :]
y = yall[0:ntrain]

Xtest = Xall[ntrain:, :]
ytest = yall[ntrain:]

# preprocess data
for i in xrange(p):
    X[:, i] -= np.mean(X[:, i])
    X[:, i] /= np.linalg.norm(X[:, i])
y -= np.mean(y)

# train LASSO
LeastAngleRegression = LeastAngleRegression()
LeastAngleRegression.set_labels(RegressionLabels(y))
LeastAngleRegression.train(RealFeatures(X.T))

# train ordinary LSR
if use_ridge:
    lsr = LinearRidgeRegression(0.01, RealFeatures(X.T), Labels(y))
    lsr.train()
else:
    lsr = LeastSquaresRegression()
    lsr.set_labels(RegressionLabels(y))
    lsr.train(RealFeatures(X.T))

# gather LASSO path
path = np.zeros((p, LeastAngleRegression.get_path_size()))
for i in xrange(path.shape[1]):
    path[:, i] = LeastAngleRegression.get_w(i)