# %% ot.RandomGenerator.SetSeed(0) distribution = ot.Normal(2) distribution.setDescription(["x", "y"]) func = ot.SymbolicFunction(['x', 'y'], ['2 * x - y + 3 + 0.05 * sin(0.8*x)']) input_sample = distribution.getSample(30) epsilon = ot.Normal(0, 0.1).getSample(30) output_sample = func(input_sample) + epsilon # %% # Let us run the linear model algorithm using the `LinearModelAlgorithm` class & get its associated result : # %% algo = ot.LinearModelAlgorithm(input_sample, output_sample) result = ot.LinearModelResult(algo.getResult()) # %% # # %% # We get the result structure. As the underlying model is of type regression, it assumes a noise distribution associated to the residuals. Let us get it: # %% print(result.getNoiseDistribution()) # %% # We can get also residuals: # %% print(result.getSampleResiduals())
from math import sin ot.TESTPREAMBLE() # lm build print("Fit y ~ 3 - 2 x + 0.05 * sin(x) model using 20 points (sin(x) ~ noise)") size = 20 oneSample = ot.Sample(size, 1) twoSample = ot.Sample(size, 1) for i in range(size): oneSample[i, 0] = 7.0 * sin(-3.5 + (6.5 * i) / (size - 1.0)) + 2.0 twoSample[i, 0] = -2.0 * oneSample[i, 0] + 3.0 + 0.05 * sin(oneSample[i, 0]) test = ot.LinearModelAlgorithm(oneSample, twoSample) result = ot.LinearModelResult(test.getResult()) print("trend coefficients = ", result.getCoefficients()) print("Fit y ~ 1 + 0.1 x + 10 x^2 model using 100 points") ot.RandomGenerator.SetSeed(0) size = 100 # Define a linespace from 0 to 10 with size points # We use a Box expermient ==> remove 0 & 1 points experiment = ot.Box([size - 2]) X = experiment.generate() # X is defined in [0,1] X *= [10] # Stack X2 X2 = ot.Sample(X) for i in range(size): X2[i, 0] = X[i, 0] * X2[i, 0]
# --------------------- # # We consider a linear model with the purpose of predicting the aerial biomass as a function of the soil physicochemical properties, # and we wish to identify the predictive variables which result in the most simple and precise linear regression model. # # We start by creating a linear model which takes into account all of the physicochemical variables present within the Linthrust data set. # # Let us consider the following linear model :math:`\tilde{Y} = a_0 + \sum_{i = 1}^{d} a_i X_i + \epsilon`. If all of the predictive variables # are considered, the regression can be performed with the help of the `LinearModelAlgorithm` class. # %% input_sample = sample[:, 1:dimension + 1] output_sample = sample[:, 0] algo_full = ot.LinearModelAlgorithm(input_sample, output_sample) algo_full.run() result_full = ot.LinearModelResult(algo_full.getResult()) print('R-squared = ', result_full.getRSquared()) print('Adjusted R-squared = ', result_full.getAdjustedRSquared()) # %% # Forward stepwise regression # --------------------------- # # We now wish to perform the selection of the most important predictive variables through a stepwise algorithm. # # It is first necessary to define a suitable function basis for the regression. Each variable is associated to a univariate basis # and an additional basis is used in order to represent the constant term :math:`a_0`. # %% functions = [] functions.append(ot.SymbolicFunction(input_description, ['1.0']))