Beispiel #1
0
    def fit(self, X, y, **fit_params):
        """Fit Linear regression model.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Training data.
        y : array-like, shape = (n_samples, [n_output_dims])
            Target values.

        Returns
        -------
        self : returns an instance of self.

        """
        algo = ot.LinearModelAlgorithm(X, y)
        algo.run()
        self.result_ = algo.getResult()
        return self
Beispiel #2
0
    sampleZ[i, 0] = sampleY[i, 0]**2
print("LinearModelFisher pvalue=%1.2g" %
      ot.LinearModelTest.LinearModelFisher(sampleY, sampleZ).getPValue())
print("LinearModelResidualMean pvalue=%1.2g" %
      ot.LinearModelTest.LinearModelResidualMean(sampleY, sampleZ).getPValue())

# Durbin Watson
ot.RandomGenerator.SetSeed(5415)
eps = ot.Normal(0, 20)
f = ot.SymbolicFunction('x', '5+2*x+x^2-0.1*x^3')
N = 15
x = ot.Sample([[0], [1.42857], [2.85714], [4.28571], [5.71429], [7.14286],
               [8.57143], [10], [11.4286], [12.8571], [14.2857], [15.7143],
               [17.1429], [18.5714], [20]])
y = f(x) + eps.getSample(N)
linmodel = ot.LinearModelAlgorithm(x, y).getResult().getCoefficients()
dwTest = ot.LinearModelTest.LinearModelDurbinWatson(x, y)
print('Durbin Watson = ', dwTest)

selection = ot.Indices(5)
selection.fill()

selection2 = ot.Indices(1, 0)
sampleX0 = sampleX.getMarginal(0)

# Regression test between 2 samples : firstSample of dimension n and
# secondSample of dimension 1. If firstSample[i] is the numerical sample
# extracted from firstSample (ith coordinate of each point of the
# numerical sample), PartialRegression performs the Regression test
# simultaneously on all firstSample[i] and secondSample, for i in the
# selection. The Regression test tests ifthe regression model between two
Beispiel #3
0
import openturns.testing as ott
from math import sin

ot.TESTPREAMBLE()

# lm build
print("Fit y ~ 3 - 2 x + 0.05 * sin(x) model using 20 points (sin(x) ~ noise)")
size = 20
oneSample = ot.Sample(size, 1)
twoSample = ot.Sample(size, 1)
for i in range(size):
    oneSample[i, 0] = 7.0 * sin(-3.5 + (6.5 * i) / (size - 1.0)) + 2.0
    twoSample[i,
              0] = -2.0 * oneSample[i, 0] + 3.0 + 0.05 * sin(oneSample[i, 0])

test = ot.LinearModelAlgorithm(oneSample, twoSample)
result = ot.LinearModelResult(test.getResult())
print("trend coefficients = ", result.getCoefficients())

print("Fit y ~ 1 + 0.1 x + 10 x^2 model using 100 points")
ot.RandomGenerator.SetSeed(0)
size = 100
# Define a linespace from 0 to 10 with size points
# We use a Box expermient ==> remove 0 & 1 points
experiment = ot.Box([size - 2])
X = experiment.generate()
# X is defined in [0,1]
X *= [10]
# Stack X2
X2 = ot.Sample(X)
for i in range(size):
ot.RandomGenerator.SetSeed(0)
distribution = ot.Normal(2)
distribution.setDescription(['x', 'y'])
func = ot.SymbolicFunction(['x', 'y'], ['2 * x - y + 3 + 0.05 * sin(0.8*x)'])
input_sample = distribution.getSample(30)
epsilon = ot.Normal(0, 0.1).getSample(30)
output_sample = func(input_sample) + epsilon

# %%
# Linear regression
# -----------------
#
# Let us run the linear model algorithm using the `LinearModelAlgorithm` class and get the associated results :

# %%
algo = ot.LinearModelAlgorithm(input_sample, output_sample)
result = algo.getResult()

# %%
# Residuals analysis
# ------------------
#
# We can now analyse the residuals of the regression on the training data.
# For clarity purposes, only the first 5 residual values are printed.

# %%
residuals = result.getSampleResiduals()
print(residuals[:5])

# %%
# Alternatively, the `standardized` or `studentized` residuals can be used:
Beispiel #5
0
import openturns.testing as ott
from math import sin

ot.TESTPREAMBLE()

# lm build
print("Fit y ~ 3 - 2 x + 0.05 * sin(x) model using 20 points (sin(x) ~ noise)")
size = 20
oneSample = ot.Sample(size, 1)
twoSample = ot.Sample(size, 1)
for i in range(size):
    oneSample[i, 0] = 7.0 * sin(-3.5 + (6.5 * i) / (size - 1.0)) + 2.0
    twoSample[i,
              0] = -2.0 * oneSample[i, 0] + 3.0 + 0.05 * sin(oneSample[i, 0])

test = ot.LinearModelAlgorithm(oneSample, twoSample)
result = ot.LinearModelResult(test.getResult())
analysis = ot.LinearModelAnalysis(result)
print(analysis)
# Compute confidence level (95%) for coefficients estimate
alpha = 0.95
# interval confidence bounds
interval = analysis.getCoefficientsConfidenceInterval(alpha)
print("confidence intervals with level=%1.2f : %s" % (alpha, interval))

print("")
print("")
print("")
print("Fit y ~ 1 + 0.1 x + 10 x^2 model using 100 points")
ot.RandomGenerator.SetSeed(0)
size = 100
Beispiel #6
0
# LinearModel tests
dimension = 2
R = ot.CorrelationMatrix(dimension)
R[0, 1] = 0.8
distribution = ot.Normal(
    ot.Point(dimension, 3.0), ot.Point(dimension, 2.0), R)
size = 100
sample2D = distribution.getSample(size)
firstSample = ot.Sample(size, 1)
secondSample = ot.Sample(size, 1)
for i in range(size):
    firstSample[i] = ot.Point(1, sample2D[i, 0])
    secondSample[i] = ot.Point(1, sample2D[i, 1])

lmtest = ot.LinearModelAlgorithm(firstSample, secondSample).getResult()
drawLinearModelVTest = ot.VisualTest.DrawLinearModel(lmtest)
print("LinearModelV = ", drawLinearModelVTest)

drawLinearModelResidualTest = ot.VisualTest.DrawLinearModelResidual(lmtest)
print("LinearModelR = ", drawLinearModelResidualTest)

# CobWeb tests
size = 100
inputDimension = 6
inputSample = ot.Normal(inputDimension).getSample(size)
inputVar = ["X" + str(i) for i in range(inputDimension)]
formula = ot.Description(1)
expression = ""
for i in range(inputDimension):
    if i > 0:
Beispiel #7
0
from matplotlib import pylab as plt
ot.Log.Show(ot.Log.NONE)

# %%
# Generate X,Y samples
N = 1000
Xsample = ot.Triangular(1.0, 5.0, 10.0).getSample(N)
Ysample = Xsample * 3.0 + ot.Normal(0.5, 1.0).getSample(N)

# %%
# Generate a particular scalar sampleX
particularXSample = ot.Triangular(1.0, 5.0, 10.0).getSample(N)

# %%
# Create the linear model from Y,X samples
result = ot.LinearModelAlgorithm(Xsample, Ysample).getResult()

# Get the coefficients ai
print("coefficients of the linear regression model = ",
      result.getCoefficients())

# Get the confidence intervals of the ai coefficients
print("confidence intervals of the coefficients = ",
      ot.LinearModelAnalysis(result).getCoefficientsConfidenceInterval(0.9))

# %%
# Validate the model with a visual test
graph = ot.VisualTest.DrawLinearModel(Xsample, Ysample, result)
view = viewer.View(graph)

# %%
Beispiel #8
0
 def fit(self, X, y, **fit_params):
     input_dimension = X.shape[1]
     algo = ot.LinearModelAlgorithm(X, y.reshape(-1, 1))
     algo.run()
     self._result = algo.getResult()
     return self
Beispiel #9
0
import openturns as ot
from openturns.viewer import View

N = 1000
#create a sample X
dist = ot.Triangular(1.0, 5.0, 10.0)
# create a Y sample : Y = exp(X/2) + eps
eps = ot.Normal(0.0, 1.0)
sample = ot.ComposedDistribution([dist, eps]).getSample(N)
f = ot.SymbolicFunction(['x', 'eps'], ['exp(0.5*x)+eps'])
sampleY = f(sample)
sampleX = sample.getMarginal(0)
sampleX.setName('X')
# same as good test
regressionModel = ot.LinearModelAlgorithm(sampleX, sampleY).getResult()
graph = ot.VisualTest.DrawLinearModelResidual(sampleX, sampleY,
                                              regressionModel)
cloud = graph.getDrawable(0)
cloud.setPointStyle('times')
graph.setDrawable(cloud, 0)
graph.setTitle('')
View(graph)
Beispiel #10
0
try:

    # lm build
    print(
        "Fit y ~ 3 - 2 x + 0.05 * sin(x) model using 20 points (sin(x) ~ noise)"
    )
    size = 20
    oneSample = ot.Sample(size, 1)
    twoSample = ot.Sample(size, 1)
    for i in range(size):
        oneSample[i, 0] = 7.0 * sin(-3.5 + (6.5 * i) / (size - 1.0)) + 2.0
        twoSample[
            i, 0] = -2.0 * oneSample[i, 0] + 3.0 + 0.05 * sin(oneSample[i, 0])

    test = ot.LinearModelAlgorithm(oneSample, twoSample)
    result = ot.LinearModelResult(test.getResult())
    print("trend coefficients = ", result.getTrendCoefficients())

    print("Fit y ~ 1 + 0.1 x + 10 x^2 model using 100 points")
    ot.RandomGenerator.SetSeed(0)
    size = 100
    # Define a linespace from 0 to 10 with size points
    # We use a Box expermient ==> remove 0 & 1 points
    experiment = ot.Box([size - 2])
    X = experiment.generate()
    # X is defined in [0,1]
    X *= [10]
    # Stack X2
    X2 = ot.Sample(X)
    for i in range(size):
Sample = ot.NumericalSample(data.values)
Sample.setName("LifeCycleSavings")
Sample.setDescription(["sr","pop15","pop75","dpi","ddpi"])

sr    = Sample[:,0]
pop15 = Sample[:,1]
pop75 = Sample[:,2]
dpi   = Sample[:,3]
ddpi  = Sample[:,4]

# model1
outputSample = Sample[:,0]
inputSample = Sample[:,1:5]

algo1 = ot.LinearModelAlgorithm(inputSample, outputSample)
result1 = algo1.getResult()
analysis1 = ot.LinearModelAnalysis(algo1.getResult())

for plot in ["drawResidualsVsFitted", "drawScaleLocation", "drawQQplot", "drawCookDistance", "drawResidualsVsLeverages", "drawCookVsLeverages"]:
    graph = getattr(analysis1, plot)()
    graph.draw("model1-"+plot, 640, 480)

# plot of residuals versus fitted values
graph = analysis1.drawResidualsVsFitted()
View(graph)

# scale-location plot of sqrt(|residuals|) versus fitted values
graph = analysis1.drawScaleLocation()
View(graph)
ot.TESTPREAMBLE()


# lm build
print(
    "Fit y ~ 3 - 2 x + 0.05 * sin(x) model using 20 points (sin(x) ~ noise)")
size = 20
oneSample = ot.Sample(size, 1)
twoSample = ot.Sample(size, 1)
for i in range(size):
    oneSample[i, 0] = 7.0 * sin(-3.5 + (6.5 * i) / (size - 1.0)) + 2.0
    twoSample[i, 0] = -2.0 * oneSample[
        i, 0] + 3.0 + 0.05 * sin(oneSample[i, 0])

test = ot.LinearModelAlgorithm(oneSample, twoSample)
result = ot.LinearModelResult(test.getResult())
print ("trend coefficients = ", result.getCoefficients())

print("Fit y ~ 1 + 0.1 x + 10 x^2 model using 100 points")
ot.RandomGenerator.SetSeed(0)
size = 100
# Define a linespace from 0 to 10 with size points
# We use a Box expermient ==> remove 0 & 1 points
experiment = ot.Box([size - 2])
X = experiment.generate()
# X is defined in [0,1]
X *= [10]
# Stack X2
X2 = ot.Sample(X)
for i in range(size):
# openturns like
X = ot.Sample(len(pop15), 0)
P = ot.Sample.BuildFromPoint(pop15)
X.stack(P)
P = ot.Sample.BuildFromPoint(pop75)
X.stack(P)
X.setDescription(["pop15", "pop75"])
Y = ot.Sample.BuildFromPoint(sr)
# Model with pop15 + pop75 without intercept
# We do not run mode with pop15 only for the moment
f = ot.SymbolicFunction(["pop15", "pop75"], ["pop15"])
g = ot.SymbolicFunction(["pop15", "pop75"], ["pop75"])
h = ot.SymbolicFunction(["pop15", "pop75"], ["1"])
basis = ot.Basis([f, g])
model = ot.LinearModelAlgorithm(X, basis, Y)
model.run()
result = model.getResult()
analysis = ot.LinearModelAnalysis(result)

np.testing.assert_almost_equal(result.getRSquared(), r2, 14)
np.testing.assert_almost_equal(result.getAdjustedRSquared(), ar2, 14)
np.testing.assert_almost_equal(analysis.getFisherScore(), ftest, 14)

#---------------------------------------------------------------------
# Model 2 : 2 param + intercepts

formula = Formula('sr ~ pop75 + pop15')
fit = stats.lm(formula)
summary = stats.summary_lm(fit)