def __init__(self, inputSample, outputSample, noiseThres=None, saturationThres=None, resDistFact=None, boxCox=False): self._inputSample = ot.NumericalSample(np.vstack(inputSample)) self._outputSample = ot.NumericalSample(np.vstack(outputSample)) self._noiseThres = noiseThres self._saturationThres = saturationThres # Add flag to tell if censored data must taken into account or not. if noiseThres is not None or saturationThres is not None: # flag to tell censoring is enabled self._censored = True # Results instances are created for both cases. self._resultsCens = _Results() self._resultsUnc = _Results() else: self._censored = False # Results instance is created only for uncensored case. self._resultsUnc = _Results() if resDistFact is None: # default is NormalFactory self._resDistFact = ot.NormalFactory() else: self._resDistFact = resDistFact # if Box Cox is a float the transformation is enabled with the given value if type(boxCox) is float: self._lambdaBoxCox = boxCox self._boxCox = True else: self._lambdaBoxCox = None self._boxCox = boxCox self._size = self._inputSample.getSize() self._dim = self._inputSample.getDimension() # Assertions on parameters assert (self._size >= 3), "Not enough observations." assert (self._size == self._outputSample.getSize()), \ "InputSample and outputSample must have the same size." assert (self._dim == 1), "Dimension of inputSample must be 1." assert (self._outputSample.getDimension() == 1 ), "Dimension of outputSample must be 1." # run the analysis self._run() # print warnings self._printWarnings()
def fit_to_normal_distribution(self, parameter, showQQ=False): """Fit results for to normal distribution. Parameters ---------- parameter: string Parameter, whose distribution is to be found. showQQ: bool, optional Decide if you want to check the fit visually Returns ------- :class:`openturns.Distribution` """ sample = self.sampledict[parameter] distribution = ot.NormalFactory().build(sample) logger.debug(distribution) if showQQ: # Draw QQ plot to check fitted distribution QQ_plot = ot.VisualTest.DrawQQplot(sample, distribution) View(QQ_plot).show() return distribution
import openturns as ot from matplotlib import pyplot as plt from openturns.viewer import View ot.RandomGenerator.SetSeed(0) factory = ot.NormalFactory() ref = factory.build() dimension = ref.getDimension() if dimension <= 2: sample = ref.getSample(50) distribution = factory.build(sample) if dimension == 1: distribution.setDescription(['$t$']) pdf_graph = distribution.drawPDF(256) cloud = ot.Cloud(sample, ot.NumericalSample(sample.getSize(), 1)) cloud.setColor('blue') cloud.setPointStyle('fcircle') pdf_graph.add(cloud) fig = plt.figure(figsize=(10, 4)) plt.suptitle(str(distribution)) pdf_axis = fig.add_subplot(111) View(pdf_graph, figure=fig, axes=[pdf_axis], add_legend=False) else: sample = ref.getSample(500) distribution.setDescription(['$t_0$', '$t_1$']) pdf_graph = distribution.drawPDF([256]*2) cloud = ot.Cloud(sample) cloud.setColor('red') cloud.setPointStyle('fcircle') pdf_graph.add(cloud) fig = plt.figure(figsize=(10, 4)) plt.suptitle(str(distribution))
# Note however that the 0.98 selected/cumulated variance ratio actually means it is very good. graphs = [] for i in range(x.getDimension()): validation = ot.KarhunenLoeveValidation(x.getMarginal(i), kl_results[i]) graph = validation.drawValidation().getGraph(0, 0) graph.setTitle( f'KL validation - marginal #{i} ratio={100.0 * ratios[i]:.2f} %') View(graph) graphs.append(graph) # %% # On the 2nd marginal we can filter out the points inside the 99% level-set # to see that actually only a few points out of N are actually outliers. graph = graphs[1] data = graph.getDrawable(1).getData() normal = ot.NormalFactory().build(data) log_pdf = normal.computeLogPDF(data).asPoint() l_pair = [(log_pdf[i], data[i]) for i in range(len(data))] l_pair.sort(key=lambda t: t[0]) index_bad = int(0.01 * len(data)) # here 0.01 = (100-99)% beta = l_pair[index_bad][0] gnorm = normal.drawLogPDF(data.getMin(), data.getMax()) bad = [l_pair[i][1] for i in range(index_bad + 1)] c = ot.Cloud(bad) c.setPointStyle("bullet") c.setColor("blue") graph.setDrawable(c, 1) dr = gnorm.getDrawable(0) dr.setLevels([beta]) dr.setColor("red") dr.setLegend("99% level-set")
ot.RandomGenerator.SetSeed(0) sample = ot.Normal(3).getSample(300) sample.stack(ot.Gumbel().getSample(300)) sample.setDescription(['X0', 'X1', 'X2', 'X3']) sample.exportToCSVFile(filename, ',') columns = [0, 2, 3] model = persalys.DataModel('myDataModel', "data1.csv", columns) myStudy.add(model) print(model) # Inference analysis ## analysis = persalys.InferenceAnalysis('analysis', model) variables = ["X0", "X3"] analysis.setInterestVariables(variables) factories = [ot.NormalFactory(), ot.GumbelFactory()] analysis.setDistributionsFactories("X3", factories) analysis.setLevel(0.1) myStudy.add(analysis) print(analysis) analysis.run() result = analysis.getResult() print("result=", result) print(result.getFittingTestResultForVariable('X3')) # script script = myStudy.getPythonScript() print(script) exec(script)
i].getSample(size) continuousSampleCollection[i].setName( continuousDistributionCollection[i].getName()) sampleCollection[i] = continuousSampleCollection[i] for i in range(discreteDistributionNumber): discreteSampleCollection[i] = discreteDistributionCollection[i].getSample( size) discreteSampleCollection[i].setName( discreteDistributionCollection[i].getName()) sampleCollection[continuousDistributionNumber + i] = discreteSampleCollection[i] factoryCollection = ot.DistributionFactoryCollection(3) factoryCollection[0] = ot.UniformFactory() factoryCollection[1] = ot.BetaFactory() factoryCollection[2] = ot.NormalFactory() aSample = ot.Uniform(-1.5, 2.5).getSample(size) model, best_bic = ot.FittingTest.BestModelBIC(aSample, factoryCollection) print("best model BIC=", repr(model)) model, best_result = ot.FittingTest.BestModelKolmogorov( aSample, factoryCollection) print("best model Kolmogorov=", repr(model)) # BIC adequation resultBIC = ot.SquareMatrix(distributionNumber) for i in range(distributionNumber): for j in range(distributionNumber): value = ot.FittingTest.BIC(sampleCollection[i], distributionCollection[j], 0) resultBIC[i, j] = value print("resultBIC=", repr(resultBIC))
weights.add(-2.5) coll.add(ot.Gamma(3.0, 4.0, -2.0)) weights.add(2.5) distribution = ot.RandomMixture(coll, weights) print("distribution=", repr(distribution)) print("distribution=", distribution) mu = distribution.getMean()[0] sigma = distribution.getStandardDeviation()[0] for i in range(10): x = mu + (-3.0 + 6.0 * i / 9.0) * sigma print("pdf( %.6f )=%.6f" % (x, distribution.computePDF(x))) # Tests of the projection mechanism collFactories = [ ot.UniformFactory(), ot.NormalFactory(), ot.TriangularFactory(), ot.ExponentialFactory(), ot.GammaFactory() ] #, TrapezoidalFactory() result, norms = distribution.project(collFactories) print("projections=", result) print("norms=", norms) # ------------------------------ Multivariate tests ------------------------------# # 2D RandomMixture collection = [ot.Normal(0.0, 1.0)] * 3 weightMatrix = ot.Matrix(2, 3) weightMatrix[0, 0] = 1.0 weightMatrix[0, 1] = -2.0
def best_fit_distribution(sample): ##Best holders## best_distribution_ot = ot.NormalFactory() best_BIC_ot = np.inf marginalFactories = [ ] # the next cicle generate all available marginals in OT ##Create list distributions## #List of admisible Distribution: #[class=DistributionFactory implementation=class=ArcsineFactory, #class=DistributionFactory implementation=class=BetaFactory, #class=DistributionFactory implementation=class=ChiFactory, #class=DistributionFactory implementation=class=ExponentialFactory, #class=DistributionFactory implementation=class=GammaFactory, #class=DistributionFactory implementation=class=GumbelFactory, #class=DistributionFactory implementation=class=LaplaceFactory, #class=DistributionFactory implementation=class=LogisticFactory, #class=DistributionFactory implementation=class=LogNormalFactory, #class=DistributionFactory implementation=class=NormalFactory, #class=DistributionFactory implementation=class=RayleighFactory, #class=DistributionFactory implementation=class=RiceFactory, #class=DistributionFactory implementation=class=UniformFactory] for factory in ot.DistributionFactory.GetContinuousUniVariateFactories(): if str(factory).startswith('Histogram'): # ~ non-parametric continue if str(factory).startswith('Burr'): # Error continue if str(factory).startswith('ChiSquare'): # Error continue if str(factory).startswith('Dirichlet'): # Error continue if str(factory).startswith('FisherSnedecor'): #Error continue if str(factory).startswith('GeneralizedPareto'): #Error continue if str(factory).startswith('InverseNormal'): # Error continue if str(factory).startswith('LogUniform'): #Error continue if str(factory).startswith('MeixnerDistribution'): # Error continue if str(factory).startswith('Pareto'): # Error continue if str(factory).startswith('Rice'): # Error continue if str(factory).startswith('Triangular'): # Error continue if str(factory).startswith('Frechet'): # Error continue if str(factory).startswith('Student'): # Error continue if str(factory).startswith('Trapezoidal'): # Error continue if str(factory).startswith('TruncatedNormal'): # Error continue if str(factory).startswith('WeibullMax'): # Error continue if str(factory).startswith('WeibullMin'): # Error continue marginalFactories.append(factory) for distribution in marginalFactories: # Calculate Bayesian information criterion ot_dist, BIC = ot.FittingTest.BIC(sample, distribution) # identify if this distribution is better if best_BIC_ot > BIC: # > 0: best_distribution_ot = ot_dist best_BIC_ot = BIC best_distribution = best_distribution_ot best_BIC = best_BIC_ot return (best_distribution, best_BIC)
ot.RandomGenerator.SetSeed(0) analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, noiseThres, saturationThres, resDistFact=resDistFact, boxCox=False) ot.RandomGenerator.SetSeed(0) POD24 = otpod.UnivariateLinearModelPOD(analysis=analysis, detection=detection) POD24.setSimulationSize(100) POD24.run() detectionSize24 = POD24.computeDetectionSize(0.9, 0.95) def test_24_a90(): np.testing.assert_almost_equal(detectionSize24[0], 0.332304242822) def test_24_a9095(): np.testing.assert_almost_equal(detectionSize24[1], 0.345249438995) ######### Test with the Linear regression and Normal factory ################# resDistFact = ot.NormalFactory() # Test with Box Cox np.random.seed(0) ot.RandomGenerator.SetSeed(0) POD25 = otpod.UnivariateLinearModelPOD(defects, signals, detection, resDistFact=resDistFact, boxCox=True) POD25.run() detectionSize25 = POD25.computeDetectionSize(0.9, 0.95) def test_25_a90(): np.testing.assert_almost_equal(detectionSize25[0], 0.313117629683) def test_25_a9095(): np.testing.assert_almost_equal(detectionSize25[1], 0.324672954397) # Test with censored data and box cox np.random.seed(0) ot.RandomGenerator.SetSeed(0) POD26 = otpod.UnivariateLinearModelPOD(defects, signals, detection, noiseThres, saturationThres, resDistFact=resDistFact, boxCox=True)
rm = ot.RandomMixture(coll, weights) coll.add(rm) weights.add(-2.5) coll.add(ot.Gamma(3.0, 4.0, -2.0)) weights.add(2.5) distribution = ot.RandomMixture(coll, weights) print("distribution=", repr(distribution)) print("distribution=", distribution) mu = distribution.getMean()[0] sigma = distribution.getStandardDeviation()[0] for i in range(10): x = mu + (-3.0 + 6.0 * i / 9.0) * sigma print("pdf( %.6f )=%.6f" % (x, distribution.computePDF(x))) # Tests of the projection mechanism collFactories = [ot.UniformFactory(), ot.NormalFactory( ), ot.TriangularFactory(), ot.ExponentialFactory(), ot.GammaFactory()] # , TrapezoidalFactory() result, norms = distribution.project(collFactories) print("projections=", result) print("norms=", norms) # ------------------------------ Multivariate tests ------------------------------# # 2D RandomMixture collection = [ot.Normal(0.0, 1.0)] * 3 weightMatrix = ot.Matrix(2, 3) weightMatrix[0, 0] = 1.0 weightMatrix[0, 1] = -2.0 weightMatrix[0, 2] = 1.0 weightMatrix[1, 0] = 1.0 weightMatrix[1, 1] = 1.0 weightMatrix[1, 2] = -3.0
#! /usr/bin/env python from __future__ import print_function import openturns as ot ot.PlatformInfo.SetNumericalPrecision(3) inner_factory = ot.NormalFactory() sample = inner_factory.build().getSample(1000) factory = ot.MaximumLikelihoodFactory(inner_factory) distribution = factory.build(sample) print(distribution)
ot.Log.Show(ot.Log.NONE) # %% # The Normal distribution # ----------------------- # # The parameters are estimated by the method of moments. # # %% # We consider a sample, here created from a standard normal distribution : sample = ot.Normal().getSample(1000) # %% # We can estimate a normal distribution with `ǸormalFactory` : distribution = ot.NormalFactory().build(sample) # %% # We take a look at the estimated parameters with the `getParameter` method : print(distribution.getParameter()) # %% # We draw the fitted distribution graph = distribution.drawPDF() graph.setTitle("Fitted Normal distribution") view = viewer.View(graph) # %% # The Student distribution # ------------------------ #
bic_curve = [] for alpha in alphas: ot.Log.Show(ot.Log.NONE) print("\tLearning with alpha={}".format(alpha)) learner = otagr.ContinuousMIIC( data_ref.select(train)) # Using CMIIC algorithm learner.setAlpha(alpha) cmiic_dag = learner.learnDAG() # Learning DAG ot.Log.Show(ot.Log.NONE) if True: cmiic_cbn = otagr.ContinuousBayesianNetworkFactory( ot.KernelSmoothing(ot.Normal()), ot.BernsteinCopulaFactory(), cmiic_dag, 0.05, 4, False).build(data_ref.select(train)) else: cmiic_cbn = otagr.ContinuousBayesianNetworkFactory( ot.NormalFactory(), ot.NormalCopulaFactory(), cmiic_dag, 0.05, 4, False).build(data_ref.select(train)) # sampled = cmiic_cbn.getSample(1000) # sampled = (sampled.rank() +1)/(sampled.getSize()+2) # pairs(sampled, figure_path.joinpath('pairs_test.pdf') ll = 0 s = 0 for point in data_ref.select(test): point_ll = cmiic_cbn.computeLogPDF(point) if np.abs(point_ll) <= 10e20: s += 1 ll += point_ll else: print("pb point=", point, "log pdf=", point_ll) ll /= s n_arc = cmiic_dag.getDAG().sizeArcs()
def test_24_a90(): np.testing.assert_almost_equal(detectionSize24[0], 0.3323508693323901, decimal=5) def test_24_a9095(): np.testing.assert_almost_equal(detectionSize24[1], 0.34262733110886456, decimal=5) ######### Test with the Linear regression and Normal factory ################# resDistFact = ot.NormalFactory() # Test with Box Cox np.random.seed(0) ot.RandomGenerator.SetSeed(0) POD25 = otpod.UnivariateLinearModelPOD(defects, signals, detection, resDistFact=resDistFact, boxCox=True) POD25.run() detectionSize25 = POD25.computeDetectionSize(0.9, 0.95) def test_25_a90(): np.testing.assert_almost_equal(detectionSize25[0], 0.31310, decimal=5)
def run(self): """ Run all active methods. """ # run the univariate linear model analysis with gaussian residuals hypothesis if self._verbose: print("\nStart univariate linear model analysis...") self._analysis = UnivariateLinearModelAnalysis(self._inputSample[:, 0], self._signals, self._noiseThres, self._saturationThres, ot.NormalFactory(), self._boxCox) # run the univariate linear model with gaussian residuals if self._activeMethods['LinearGauss']: if self._verbose: print("\nStart univariate linear model POD with Gaussian residuals...") self._PODgauss = UnivariateLinearModelPOD(self._inputSample[:, 0], self._signals, self._detection, self._noiseThres, self._saturationThres, ot.NormalFactory(), self._boxCox) self._PODgauss.setVerbose(self._verbose) self._PODgauss.setSimulationSize(self._simulationSize) self._PODgauss.run() # run the univariate linear model with no hypothesis on the residuals if self._activeMethods['LinearBinomial']: if self._verbose: print("\nStart univariate linear model POD with no hypothesis on the residuals...") self._PODbin = UnivariateLinearModelPOD(self._inputSample[:, 0], self._signals, self._detection, self._noiseThres, self._saturationThres, None, self._boxCox) self._PODbin.setVerbose(self._verbose) self._PODbin.run() # run the univariate linear model with kernel smoothing on the residuals if self._activeMethods['LinearKernelSmoothing']: if self._verbose: print("\nStart univariate linear model POD with kernel smoothing on the residuals...") self._PODks = UnivariateLinearModelPOD(self._inputSample[:, 0], self._signals, self._detection, self._noiseThres, self._saturationThres, ot.KernelSmoothing(), self._boxCox) self._PODks.setVerbose(self._verbose) self._PODks.setSimulationSize(self._simulationSize) self._PODks.run() # run the quantile regression if self._activeMethods['QuantileRegression']: if self._verbose: print("\nStart quantile regression POD...") self._PODqr = QuantileRegressionPOD(self._inputSample[:, 0], self._signals, self._detection, self._noiseThres, self._saturationThres, self._boxCox) self._PODqr.setVerbose(self._verbose) self._PODqr.setSimulationSize(self._simulationSize) self._PODqr.run() # run the polynomial chaos if self._activeMethods['PolynomialChaos']: if self._verbose: print("\nStart polynomial chaos POD...") self._PODchaos = PolynomialChaosPOD(self._inputSample, self._signals, self._detection, self._noiseThres, self._saturationThres, self._boxCox) self._PODchaos.setVerbose(self._verbose) self._PODchaos.setSimulationSize(self._simulationSize) self._PODchaos.setSamplingSize(self._samplingSize) self._PODchaos.run() # run the kriging if self._dim > 1 and self._activeMethods['Kriging']: if self._verbose: print("\nStart kriging POD...") self._PODkriging = KrigingPOD(self._inputSample, self._signals, self._detection, self._noiseThres, self._saturationThres, self._boxCox) self._PODkriging.setVerbose(self._verbose) self._PODkriging.setSimulationSize(self._simulationSize) self._PODkriging.setSamplingSize(self._samplingSize) self._PODkriging.run()
# %% result.getStatistic() # %% # Case 2 : the distribution parameters are estimated from the sample. # ------------------------------------------------------------------- # # In the case where the parameters of the distribution are estimated from the sample, we must use the `Lilliefors` static method and the distribution factory to be tested. # %% ot.ResourceMap.SetAsUnsignedInteger( "FittingTest-LillieforsMaximumSamplingSize", 1000) # %% distributionFactory = ot.NormalFactory() # %% dist, result = ot.FittingTest.Lilliefors(sample, distributionFactory, 0.01) print('Conclusion=', result.getBinaryQualityMeasure(), 'P-value=', result.getPValue()) # %% dist # %% # Test succeeded ? # %% result.getBinaryQualityMeasure()
from matplotlib import pylab as plt ot.Log.Show(ot.Log.NONE) # %% # Set the random generator seed ot.RandomGenerator.SetSeed(0) # %% # The standard normal # ------------------- # The parameters of the standard normal distribution are estimated by a method of moments method. # Thus the asymptotic parameters distribution is normal and estimated by bootstrap on the initial data. # distribution = ot.Normal(0.0, 1.0) sample = distribution.getSample(50) estimated = ot.NormalFactory().build(sample) # %% # We take a look at the estimated parameters : print(estimated.getParameter()) # %% # The `buildEstimator` method gives the asymptotic parameters distribution. # fittedRes = ot.NormalFactory().buildEstimator(sample) paramDist = fittedRes.getParameterDistribution() # %% # We draw the 2D-PDF of the parameters graph = paramDist.drawPDF() graph.setXTitle(r"$\mu$")