Example #1
0
    def __init__(self,
                 inputSample,
                 outputSample,
                 noiseThres=None,
                 saturationThres=None,
                 resDistFact=None,
                 boxCox=False):

        self._inputSample = ot.NumericalSample(np.vstack(inputSample))
        self._outputSample = ot.NumericalSample(np.vstack(outputSample))
        self._noiseThres = noiseThres
        self._saturationThres = saturationThres
        # Add flag to tell if censored data must taken into account or not.
        if noiseThres is not None or saturationThres is not None:
            # flag to tell censoring is enabled
            self._censored = True
            # Results instances are created for both cases.
            self._resultsCens = _Results()
            self._resultsUnc = _Results()
        else:
            self._censored = False
            # Results instance is created only for uncensored case.
            self._resultsUnc = _Results()

        if resDistFact is None:
            # default is NormalFactory
            self._resDistFact = ot.NormalFactory()
        else:
            self._resDistFact = resDistFact

        # if Box Cox is a float the transformation is enabled with the given value
        if type(boxCox) is float:
            self._lambdaBoxCox = boxCox
            self._boxCox = True
        else:
            self._lambdaBoxCox = None
            self._boxCox = boxCox

        self._size = self._inputSample.getSize()
        self._dim = self._inputSample.getDimension()

        # Assertions on parameters
        assert (self._size >= 3), "Not enough observations."
        assert (self._size == self._outputSample.getSize()), \
                "InputSample and outputSample must have the same size."
        assert (self._dim == 1), "Dimension of inputSample must be 1."
        assert (self._outputSample.getDimension() == 1
                ), "Dimension of outputSample must be 1."

        # run the analysis
        self._run()
        # print warnings
        self._printWarnings()
    def fit_to_normal_distribution(self, parameter, showQQ=False):
        """Fit results for to normal distribution.

        Parameters
        ----------
        parameter: string
            Parameter, whose distribution is to be found.
        showQQ: bool, optional
            Decide if you want to check the fit visually

        Returns
        -------
        :class:`openturns.Distribution`
        """
        sample = self.sampledict[parameter]
        distribution = ot.NormalFactory().build(sample)
        logger.debug(distribution)
        if showQQ:
            # Draw QQ plot to check fitted distribution
            QQ_plot = ot.VisualTest.DrawQQplot(sample, distribution)
            View(QQ_plot).show()
        return distribution
import openturns as ot
from matplotlib import pyplot as plt
from openturns.viewer import View
ot.RandomGenerator.SetSeed(0)
factory = ot.NormalFactory()
ref = factory.build()
dimension = ref.getDimension()
if dimension <= 2:
    sample = ref.getSample(50)
    distribution = factory.build(sample)
    if dimension == 1:
        distribution.setDescription(['$t$'])
        pdf_graph = distribution.drawPDF(256)
        cloud = ot.Cloud(sample, ot.NumericalSample(sample.getSize(), 1))
        cloud.setColor('blue')
        cloud.setPointStyle('fcircle')
        pdf_graph.add(cloud)
        fig = plt.figure(figsize=(10, 4))
        plt.suptitle(str(distribution))
        pdf_axis = fig.add_subplot(111)
        View(pdf_graph, figure=fig, axes=[pdf_axis], add_legend=False)
    else:
        sample = ref.getSample(500)
        distribution.setDescription(['$t_0$', '$t_1$'])
        pdf_graph = distribution.drawPDF([256]*2)
        cloud = ot.Cloud(sample)
        cloud.setColor('red')
        cloud.setPointStyle('fcircle')
        pdf_graph.add(cloud)
        fig = plt.figure(figsize=(10, 4))
        plt.suptitle(str(distribution))
Example #4
0
# Note however that the 0.98 selected/cumulated variance ratio actually means it is very good.
graphs = []
for i in range(x.getDimension()):
    validation = ot.KarhunenLoeveValidation(x.getMarginal(i), kl_results[i])
    graph = validation.drawValidation().getGraph(0, 0)
    graph.setTitle(
        f'KL validation - marginal #{i} ratio={100.0 * ratios[i]:.2f} %')
    View(graph)
    graphs.append(graph)

# %%
# On the 2nd marginal we can filter out the points inside the 99% level-set
# to see that actually only a few points out of N are actually outliers.
graph = graphs[1]
data = graph.getDrawable(1).getData()
normal = ot.NormalFactory().build(data)
log_pdf = normal.computeLogPDF(data).asPoint()
l_pair = [(log_pdf[i], data[i]) for i in range(len(data))]
l_pair.sort(key=lambda t: t[0])
index_bad = int(0.01 * len(data))  # here 0.01 = (100-99)%
beta = l_pair[index_bad][0]
gnorm = normal.drawLogPDF(data.getMin(), data.getMax())
bad = [l_pair[i][1] for i in range(index_bad + 1)]
c = ot.Cloud(bad)
c.setPointStyle("bullet")
c.setColor("blue")
graph.setDrawable(c, 1)
dr = gnorm.getDrawable(0)
dr.setLevels([beta])
dr.setColor("red")
dr.setLegend("99% level-set")
Example #5
0
ot.RandomGenerator.SetSeed(0)
sample = ot.Normal(3).getSample(300)
sample.stack(ot.Gumbel().getSample(300))
sample.setDescription(['X0', 'X1', 'X2', 'X3'])
sample.exportToCSVFile(filename, ',')
columns = [0, 2, 3]

model = persalys.DataModel('myDataModel', "data1.csv", columns)
myStudy.add(model)
print(model)

# Inference analysis ##
analysis = persalys.InferenceAnalysis('analysis', model)
variables = ["X0", "X3"]
analysis.setInterestVariables(variables)
factories = [ot.NormalFactory(), ot.GumbelFactory()]
analysis.setDistributionsFactories("X3", factories)
analysis.setLevel(0.1)
myStudy.add(analysis)
print(analysis)

analysis.run()

result = analysis.getResult()
print("result=", result)
print(result.getFittingTestResultForVariable('X3'))

# script
script = myStudy.getPythonScript()
print(script)
exec(script)
Example #6
0
        i].getSample(size)
    continuousSampleCollection[i].setName(
        continuousDistributionCollection[i].getName())
    sampleCollection[i] = continuousSampleCollection[i]
for i in range(discreteDistributionNumber):
    discreteSampleCollection[i] = discreteDistributionCollection[i].getSample(
        size)
    discreteSampleCollection[i].setName(
        discreteDistributionCollection[i].getName())
    sampleCollection[continuousDistributionNumber +
                     i] = discreteSampleCollection[i]

factoryCollection = ot.DistributionFactoryCollection(3)
factoryCollection[0] = ot.UniformFactory()
factoryCollection[1] = ot.BetaFactory()
factoryCollection[2] = ot.NormalFactory()
aSample = ot.Uniform(-1.5, 2.5).getSample(size)
model, best_bic = ot.FittingTest.BestModelBIC(aSample, factoryCollection)
print("best model BIC=", repr(model))
model, best_result = ot.FittingTest.BestModelKolmogorov(
    aSample, factoryCollection)
print("best model Kolmogorov=", repr(model))

# BIC adequation
resultBIC = ot.SquareMatrix(distributionNumber)
for i in range(distributionNumber):
    for j in range(distributionNumber):
        value = ot.FittingTest.BIC(sampleCollection[i],
                                   distributionCollection[j], 0)
        resultBIC[i, j] = value
print("resultBIC=", repr(resultBIC))
Example #7
0
weights.add(-2.5)
coll.add(ot.Gamma(3.0, 4.0, -2.0))
weights.add(2.5)
distribution = ot.RandomMixture(coll, weights)
print("distribution=", repr(distribution))
print("distribution=", distribution)
mu = distribution.getMean()[0]
sigma = distribution.getStandardDeviation()[0]
for i in range(10):
    x = mu + (-3.0 + 6.0 * i / 9.0) * sigma
    print("pdf( %.6f )=%.6f" % (x, distribution.computePDF(x)))

# Tests of the projection mechanism
collFactories = [
    ot.UniformFactory(),
    ot.NormalFactory(),
    ot.TriangularFactory(),
    ot.ExponentialFactory(),
    ot.GammaFactory()
]
#, TrapezoidalFactory()
result, norms = distribution.project(collFactories)
print("projections=", result)
print("norms=", norms)
# ------------------------------ Multivariate tests ------------------------------#
# 2D RandomMixture
collection = [ot.Normal(0.0, 1.0)] * 3

weightMatrix = ot.Matrix(2, 3)
weightMatrix[0, 0] = 1.0
weightMatrix[0, 1] = -2.0
def best_fit_distribution(sample):

    ##Best holders##
    best_distribution_ot = ot.NormalFactory()
    best_BIC_ot = np.inf
    marginalFactories = [
    ]  # the next cicle generate all available marginals in OT

    ##Create list distributions##
    #List of admisible Distribution:
    #[class=DistributionFactory implementation=class=ArcsineFactory,
    #class=DistributionFactory implementation=class=BetaFactory,
    #class=DistributionFactory implementation=class=ChiFactory,
    #class=DistributionFactory implementation=class=ExponentialFactory,
    #class=DistributionFactory implementation=class=GammaFactory,
    #class=DistributionFactory implementation=class=GumbelFactory,
    #class=DistributionFactory implementation=class=LaplaceFactory,
    #class=DistributionFactory implementation=class=LogisticFactory,
    #class=DistributionFactory implementation=class=LogNormalFactory,
    #class=DistributionFactory implementation=class=NormalFactory,
    #class=DistributionFactory implementation=class=RayleighFactory,
    #class=DistributionFactory implementation=class=RiceFactory,
    #class=DistributionFactory implementation=class=UniformFactory]
    for factory in ot.DistributionFactory.GetContinuousUniVariateFactories():
        if str(factory).startswith('Histogram'):
            # ~ non-parametric
            continue
        if str(factory).startswith('Burr'):
            # Error
            continue
        if str(factory).startswith('ChiSquare'):
            # Error
            continue
        if str(factory).startswith('Dirichlet'):
            # Error
            continue
        if str(factory).startswith('FisherSnedecor'):
            #Error
            continue
        if str(factory).startswith('GeneralizedPareto'):
            #Error
            continue
        if str(factory).startswith('InverseNormal'):
            # Error
            continue
        if str(factory).startswith('LogUniform'):
            #Error
            continue
        if str(factory).startswith('MeixnerDistribution'):
            # Error
            continue
        if str(factory).startswith('Pareto'):
            # Error
            continue
        if str(factory).startswith('Rice'):
            # Error
            continue
        if str(factory).startswith('Triangular'):
            # Error
            continue
        if str(factory).startswith('Frechet'):
            # Error
            continue
        if str(factory).startswith('Student'):
            # Error
            continue
        if str(factory).startswith('Trapezoidal'):
            # Error
            continue
        if str(factory).startswith('TruncatedNormal'):
            # Error
            continue
        if str(factory).startswith('WeibullMax'):
            # Error
            continue
        if str(factory).startswith('WeibullMin'):
            # Error
            continue
        marginalFactories.append(factory)

    for distribution in marginalFactories:
        # Calculate Bayesian information criterion
        ot_dist, BIC = ot.FittingTest.BIC(sample, distribution)
        # identify if this distribution is better
        if best_BIC_ot > BIC:  # > 0:
            best_distribution_ot = ot_dist
            best_BIC_ot = BIC
    best_distribution = best_distribution_ot
    best_BIC = best_BIC_ot
    return (best_distribution, best_BIC)
Example #9
0
ot.RandomGenerator.SetSeed(0)
analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, noiseThres, saturationThres, resDistFact=resDistFact, boxCox=False)
ot.RandomGenerator.SetSeed(0)
POD24 = otpod.UnivariateLinearModelPOD(analysis=analysis, detection=detection)
POD24.setSimulationSize(100)
POD24.run()
detectionSize24 = POD24.computeDetectionSize(0.9, 0.95)
def test_24_a90():
    np.testing.assert_almost_equal(detectionSize24[0], 0.332304242822)
def test_24_a9095():
    np.testing.assert_almost_equal(detectionSize24[1], 0.345249438995)



######### Test with the Linear regression and Normal factory #################
resDistFact = ot.NormalFactory()
# Test with Box Cox
np.random.seed(0)
ot.RandomGenerator.SetSeed(0)
POD25 = otpod.UnivariateLinearModelPOD(defects, signals, detection, resDistFact=resDistFact, boxCox=True)
POD25.run()
detectionSize25 = POD25.computeDetectionSize(0.9, 0.95)
def test_25_a90():
    np.testing.assert_almost_equal(detectionSize25[0], 0.313117629683)
def test_25_a9095():
    np.testing.assert_almost_equal(detectionSize25[1], 0.324672954397)

# Test with censored data and box cox
np.random.seed(0)
ot.RandomGenerator.SetSeed(0)
POD26 = otpod.UnivariateLinearModelPOD(defects, signals, detection, noiseThres, saturationThres, resDistFact=resDistFact, boxCox=True)
Example #10
0
rm = ot.RandomMixture(coll, weights)
coll.add(rm)
weights.add(-2.5)
coll.add(ot.Gamma(3.0, 4.0, -2.0))
weights.add(2.5)
distribution = ot.RandomMixture(coll, weights)
print("distribution=", repr(distribution))
print("distribution=", distribution)
mu = distribution.getMean()[0]
sigma = distribution.getStandardDeviation()[0]
for i in range(10):
    x = mu + (-3.0 + 6.0 * i / 9.0) * sigma
    print("pdf( %.6f )=%.6f" % (x, distribution.computePDF(x)))

# Tests of the projection mechanism
collFactories = [ot.UniformFactory(), ot.NormalFactory(
), ot.TriangularFactory(), ot.ExponentialFactory(), ot.GammaFactory()]
# , TrapezoidalFactory()
result, norms = distribution.project(collFactories)
print("projections=", result)
print("norms=", norms)
# ------------------------------ Multivariate tests ------------------------------#
# 2D RandomMixture
collection = [ot.Normal(0.0, 1.0)] * 3

weightMatrix = ot.Matrix(2, 3)
weightMatrix[0, 0] = 1.0
weightMatrix[0, 1] = -2.0
weightMatrix[0, 2] = 1.0
weightMatrix[1, 0] = 1.0
weightMatrix[1, 1] = 1.0
weightMatrix[1, 2] = -3.0
Example #11
0
#! /usr/bin/env python

from __future__ import print_function
import openturns as ot

ot.PlatformInfo.SetNumericalPrecision(3)
inner_factory = ot.NormalFactory()
sample = inner_factory.build().getSample(1000)
factory = ot.MaximumLikelihoodFactory(inner_factory)
distribution = factory.build(sample)
print(distribution)
ot.Log.Show(ot.Log.NONE)

# %%
# The Normal distribution
# -----------------------
#
# The parameters are estimated by the method of moments.
#

# %%
# We consider a sample, here created from a standard normal distribution :
sample = ot.Normal().getSample(1000)

# %%
# We can estimate a normal distribution with `ǸormalFactory` :
distribution = ot.NormalFactory().build(sample)

# %%
# We take a look at the estimated parameters with the `getParameter` method :
print(distribution.getParameter())

# %%
# We draw the fitted distribution
graph = distribution.drawPDF()
graph.setTitle("Fitted Normal distribution")
view = viewer.View(graph)

# %%
# The Student distribution
# ------------------------
#
Example #13
0
 bic_curve = []
 for alpha in alphas:
     ot.Log.Show(ot.Log.NONE)
     print("\tLearning with alpha={}".format(alpha))
     learner = otagr.ContinuousMIIC(
         data_ref.select(train))  # Using CMIIC algorithm
     learner.setAlpha(alpha)
     cmiic_dag = learner.learnDAG()  # Learning DAG
     ot.Log.Show(ot.Log.NONE)
     if True:
         cmiic_cbn = otagr.ContinuousBayesianNetworkFactory(
             ot.KernelSmoothing(ot.Normal()), ot.BernsteinCopulaFactory(),
             cmiic_dag, 0.05, 4, False).build(data_ref.select(train))
     else:
         cmiic_cbn = otagr.ContinuousBayesianNetworkFactory(
             ot.NormalFactory(), ot.NormalCopulaFactory(), cmiic_dag, 0.05,
             4, False).build(data_ref.select(train))
     # sampled = cmiic_cbn.getSample(1000)
     # sampled = (sampled.rank() +1)/(sampled.getSize()+2)
     # pairs(sampled, figure_path.joinpath('pairs_test.pdf')
     ll = 0
     s = 0
     for point in data_ref.select(test):
         point_ll = cmiic_cbn.computeLogPDF(point)
         if np.abs(point_ll) <= 10e20:
             s += 1
             ll += point_ll
         else:
             print("pb point=", point, "log pdf=", point_ll)
     ll /= s
     n_arc = cmiic_dag.getDAG().sizeArcs()
Example #14
0

def test_24_a90():
    np.testing.assert_almost_equal(detectionSize24[0],
                                   0.3323508693323901,
                                   decimal=5)


def test_24_a9095():
    np.testing.assert_almost_equal(detectionSize24[1],
                                   0.34262733110886456,
                                   decimal=5)


######### Test with the Linear regression and Normal factory #################
resDistFact = ot.NormalFactory()
# Test with Box Cox
np.random.seed(0)
ot.RandomGenerator.SetSeed(0)
POD25 = otpod.UnivariateLinearModelPOD(defects,
                                       signals,
                                       detection,
                                       resDistFact=resDistFact,
                                       boxCox=True)
POD25.run()
detectionSize25 = POD25.computeDetectionSize(0.9, 0.95)


def test_25_a90():
    np.testing.assert_almost_equal(detectionSize25[0], 0.31310, decimal=5)
Example #15
0
    def run(self):
        """
        Run all active methods.
        """

        # run the univariate linear model analysis with gaussian residuals hypothesis
        if self._verbose:
            print("\nStart univariate linear model analysis...")
        self._analysis = UnivariateLinearModelAnalysis(self._inputSample[:, 0],
                                                 self._signals, self._noiseThres,
                                                 self._saturationThres,
                                                 ot.NormalFactory(), self._boxCox)

        # run the univariate linear model with gaussian residuals
        if self._activeMethods['LinearGauss']:
            if self._verbose:
                print("\nStart univariate linear model POD with Gaussian residuals...")
            self._PODgauss = UnivariateLinearModelPOD(self._inputSample[:, 0], self._signals,
                                                self._detection, self._noiseThres,
                                                self._saturationThres,
                                                ot.NormalFactory(), self._boxCox)
            self._PODgauss.setVerbose(self._verbose)
            self._PODgauss.setSimulationSize(self._simulationSize)
            self._PODgauss.run()


        # run the univariate linear model with no hypothesis on the residuals
        if self._activeMethods['LinearBinomial']:
            if self._verbose:
                print("\nStart univariate linear model POD with no hypothesis on the residuals...")
            self._PODbin = UnivariateLinearModelPOD(self._inputSample[:, 0], self._signals,
                                                self._detection, self._noiseThres,
                                                self._saturationThres,
                                                None, self._boxCox)
            self._PODbin.setVerbose(self._verbose)
            self._PODbin.run()

        # run the univariate linear model with kernel smoothing on the residuals
        if self._activeMethods['LinearKernelSmoothing']:
            if self._verbose:
                print("\nStart univariate linear model POD with kernel smoothing on the residuals...")
            self._PODks = UnivariateLinearModelPOD(self._inputSample[:, 0], self._signals,
                                                self._detection, self._noiseThres,
                                                self._saturationThres,
                                                ot.KernelSmoothing(), self._boxCox)
            self._PODks.setVerbose(self._verbose)
            self._PODks.setSimulationSize(self._simulationSize)
            self._PODks.run()

        # run the quantile regression 
        if self._activeMethods['QuantileRegression']:
            if self._verbose:
                print("\nStart quantile regression POD...")
            self._PODqr = QuantileRegressionPOD(self._inputSample[:, 0], self._signals,
                                                self._detection, self._noiseThres,
                                                self._saturationThres, self._boxCox)
            self._PODqr.setVerbose(self._verbose)
            self._PODqr.setSimulationSize(self._simulationSize)
            self._PODqr.run()


        # run the polynomial chaos
        if self._activeMethods['PolynomialChaos']:
            if self._verbose:
                print("\nStart polynomial chaos POD...")
            self._PODchaos = PolynomialChaosPOD(self._inputSample, self._signals,
                                       self._detection, self._noiseThres,
                                       self._saturationThres, self._boxCox)
            self._PODchaos.setVerbose(self._verbose)
            self._PODchaos.setSimulationSize(self._simulationSize)
            self._PODchaos.setSamplingSize(self._samplingSize)
            self._PODchaos.run()

        # run the kriging
        if self._dim > 1 and self._activeMethods['Kriging']:
            if self._verbose:
                print("\nStart kriging POD...")
            self._PODkriging = KrigingPOD(self._inputSample, self._signals,
                               self._detection, self._noiseThres,
                               self._saturationThres, self._boxCox)
            self._PODkriging.setVerbose(self._verbose)
            self._PODkriging.setSimulationSize(self._simulationSize)
            self._PODkriging.setSamplingSize(self._samplingSize)
            self._PODkriging.run()
Example #16
0
# %%
result.getStatistic()

# %%
# Case 2 : the distribution parameters are estimated from the sample.
# -------------------------------------------------------------------
#
# In the case where the parameters of the distribution are estimated from the sample, we must use the `Lilliefors` static method and the distribution factory to be tested.

# %%
ot.ResourceMap.SetAsUnsignedInteger(
    "FittingTest-LillieforsMaximumSamplingSize", 1000)

# %%
distributionFactory = ot.NormalFactory()

# %%
dist, result = ot.FittingTest.Lilliefors(sample, distributionFactory, 0.01)
print('Conclusion=', result.getBinaryQualityMeasure(), 'P-value=',
      result.getPValue())

# %%
dist

# %%
# Test succeeded ?

# %%
result.getBinaryQualityMeasure()
from matplotlib import pylab as plt
ot.Log.Show(ot.Log.NONE)

# %%
# Set the random generator seed
ot.RandomGenerator.SetSeed(0)

# %%
# The standard normal
# -------------------
# The parameters of the standard normal distribution are estimated by a method of moments method.
# Thus the asymptotic parameters distribution is normal and estimated by bootstrap on the initial data.
#
distribution = ot.Normal(0.0, 1.0)
sample = distribution.getSample(50)
estimated = ot.NormalFactory().build(sample)

# %%
# We take a look at the estimated parameters :
print(estimated.getParameter())

# %%
# The `buildEstimator` method gives the asymptotic parameters distribution.
#
fittedRes = ot.NormalFactory().buildEstimator(sample)
paramDist = fittedRes.getParameterDistribution()

# %%
# We draw the 2D-PDF of the parameters
graph = paramDist.drawPDF()
graph.setXTitle(r"$\mu$")