def __init__(self, data, sample_type=None, dist=None): from pysgpp.extensions.datadriven.uq.dists import Uniform, Beta, SGDEdist, Normal, GaussianKDEDist from pysgpp.extensions.datadriven.uq.quadrature.marginalization.marginalization import doMarginalize # fix stochastic setting self.alpha, self.beta = 5., 10. self.lwr, self.upr = 0., 1. self.normal = Normal(0, 1, -2, 2) self.uniform = Uniform(self.lwr, self.upr) self.b = Beta(self.alpha, self.beta, self.lwr, self.upr) self.dim = data.shape[0] if sample_type == 'cbeta': # marginalize the density opMar = createOperationDensityMargTo1DKDE(dist.dist) kdex = GaussianKDE() opMar.margToDimX(kdex, 0) kdey = GaussianKDE() opMar.margToDimX(kdey, 1) # set the mean vector and the correlation matrix self.x = [GaussianKDEDist(kdex.getSamples().array()), GaussianKDEDist(kdey.getSamples().array())] self.M = np.array([[kdex.mean(), kdey.mean()]]).T self.S = dist.corrcoeff() else: self.x = [self.b, self.b] self.M = np.array([[self.b.mean(), self.b.mean()]]).T self.S = np.array([[1., 0.], [0., 1.]]) # compute the correlation matrix from the covariance matrix # this is used to transform the results back to the original space self.D = np.diag(np.sqrt(np.diag(self.S))) # divide the diagonal by the standard deviation of the diagonal elements self.D_inverse = np.diag(1. / np.sqrt(np.diag(self.S))) self.C = self.D_inverse.dot(self.S.dot(self.D_inverse)) # fig = plt.figure() # plotDensity1d(self.x[0]) # plotDensity1d(self.b) # fig.show() # # fig = plt.figure() # plotDensity1d(self.x[1]) # plotDensity1d(self.b) # fig.show() # compute cholesky decomposition self.L = np.linalg.cholesky(self.C) # adjust it according to [Lu ...] # nothing needs to be done for uniform <--> uniform self.L = self.L self.L_inverse = np.linalg.inv(self.L) assert abs(np.sum(self.C - self.L.dot(self.L.T))) < 1e-14 assert abs(np.sum(self.S - self.D.dot(self.L.dot(self.L.T.dot(self.D))))) < 1e-14
def test1DCDFandPPF(self): # prepare data U = Normal(0.5, 0.1, 0, 1) train_samples = U.rvs(1000).reshape(1000, 1) dist = SGDEdist.byLearnerSGDEConfig(train_samples, config={ "grid_level": 5, "grid_type": "poly", "refinement_numSteps": 0, "refinement_numPoints": 10, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": True }, bounds=U.getBounds()) fig = plt.figure() plt.hist(train_samples, bins=10, normed=True) plotDensity1d(U) plotDensity1d(dist) plt.title("original space") fig.show() transformed_samples = dist.cdf(train_samples) fig = plt.figure() plt.hist(transformed_samples, bins=10, normed=True) plt.title("uniform space") fig.show() transformed_samples = dist.ppf(transformed_samples) fig = plt.figure() plt.hist(transformed_samples, bins=10, normed=True) plotDensity1d(U) plotDensity1d(dist) plt.title("original space") fig.show() plt.show()
def test1DCDFandPPF(self): # prepare data U = Normal(0.5, 0.1, 0, 1) train_samples = U.rvs(1000).reshape(1000, 1) dist = KDEDist(train_samples, kernelType=KernelType_EPANECHNIKOV) rc('font', **{'size': 18}) fig = plt.figure() x = np.linspace(0, 1, 1000) plt.plot(x, dist.cdf(x), label="estimated") plt.plot(x, [U.cdf(xi) for xi in x], label="analytic") plt.legend(loc="lower right") fig.show() fig = plt.figure() plt.hist(train_samples, normed=True) plotDensity1d(U, label="analytic") plotDensity1d(dist, label="estimated") plt.title("original space") plt.legend() fig.show() transformed_samples = dist.cdf(train_samples) fig = plt.figure() plt.hist(transformed_samples, normed=True) plt.title("uniform space") fig.show() transformed_samples = dist.ppf(transformed_samples) fig = plt.figure() plt.hist(transformed_samples, normed=True) plotDensity1d(U, label="analytic") plotDensity1d(dist, label="estimated") plt.title("original space") plt.legend() fig.show() plt.show()
def testChangeBandwidths(self): # dimension of domain d = 1 dist = J([Normal(0.5, 0.1, 0, 1)] * d) # estimate a kernel density samples = DataMatrix(dist.rvs(1000)) kde = KernelDensityEstimator(samples) bandwidths = DataVector(d) kde.getBandwidths(bandwidths) hs = np.logspace(9e-4, 5e-1, 10, True, 10) - 1 # fig = plt.figure() # x = np.linspace(0, 1, 1000) # for h in hs: # bandwidths[0] = h # kde.setBandwidths(bandwidths) # y = [kde.pdf(DataVector([xi])) for xi in x] # plt.plot(x, y, label="h=%g" % h) # # plt.legend() # fig.show() fig = plt.figure() sample = DataVector(kde.getDim()) skipElements = IndexVector(1) yhs = np.ndarray(len(hs)) for k, h in enumerate(hs): bandwidths[0] = h kde.setBandwidths(bandwidths) x = np.ndarray(kde.getNsamples()) values = np.ndarray(kde.getNsamples()) for i in range(kde.getNsamples()): skipElements[0] = i kde.getSample(i, sample) values[i] = -np.log(kde.evalSubset(sample, skipElements)) x[i] = sample[0] yhs[k] = np.mean(values) # sort x values ixs = np.argsort(x) plt.plot(x[ixs], values[ixs], label="h=%g" % h) plt.legend() fig.show() fig = plt.figure() plt.plot(hs, yhs) fig.show() plt.show()
class NatafTransformation(object): def __init__(self, data, sample_type=None, dist=None): from pysgpp.extensions.datadriven.uq.dists import Uniform, Beta, SGDEdist, Normal, KDEDist from pysgpp.extensions.datadriven.uq.quadrature.marginalization.marginalization import doMarginalize # fix stochastic setting self.alpha, self.beta = 5., 10. self.lwr, self.upr = 0., 1. self.normal = Normal(0, 1, -2, 2) self.uniform = Uniform(self.lwr, self.upr) self.b = Beta(self.alpha, self.beta, self.lwr, self.upr) self.dim = data.shape[0] if sample_type == 'cbeta': # marginalize the density opMar = createOperationDensityMargTo1DKDE(dist.dist) kdex = KernelDensityEstimator() opMar.margToDimX(kdex, 0) kdey = KernelDensityEstimator() opMar.margToDimX(kdey, 1) # set the mean vector and the correlation matrix self.x = [ KDEDist(kdex.getSamples().array()), KDEDist(kdey.getSamples().array()) ] self.M = np.array([[kdex.mean(), kdey.mean()]]).T self.S = dist.corrcoeff() else: self.x = [self.b, self.b] self.M = np.array([[self.b.mean(), self.b.mean()]]).T self.S = np.array([[1., 0.], [0., 1.]]) # compute the correlation matrix from the covariance matrix # this is used to transform the results back to the original space self.D = np.diag(np.sqrt(np.diag(self.S))) # divide the diagonal by the standard deviation of the diagonal elements self.D_inverse = np.diag(1. / np.sqrt(np.diag(self.S))) self.C = self.D_inverse.dot(self.S.dot(self.D_inverse)) # fig = plt.figure() # plotDensity1d(self.x[0]) # plotDensity1d(self.b) # fig.show() # # fig = plt.figure() # plotDensity1d(self.x[1]) # plotDensity1d(self.b) # fig.show() # compute cholesky decomposition self.L = np.linalg.cholesky(self.C) # adjust it according to [Lu ...] # nothing needs to be done for uniform <--> uniform self.L = self.L self.L_inverse = np.linalg.inv(self.L) assert abs(np.sum(self.C - self.L.dot(self.L.T))) < 1e-14 assert abs( np.sum(self.S - self.D.dot(self.L.dot(self.L.T.dot(self.D))))) < 1e-14 def trans_U_to_X(self, u_vars, x_vars): z_vars = np.zeros(u_vars.shape) self.trans_U_to_Z(u_vars, z_vars) self.trans_Z_to_X(z_vars, x_vars) def trans_X_to_U(self, x_vars, u_vars): z_vars = np.zeros(u_vars.shape) self.trans_X_to_Z(x_vars, z_vars) self.trans_Z_to_U(z_vars, u_vars) def trans_Z_to_X(self, z_vars, x_vars): for i in range(self.dim): normcdf = self.normal.cdf(z_vars[i]) scaled_x = self.x[i].ppf(normcdf.reshape(len(normcdf), 1)) scaled_x = scaled_x.reshape(len(normcdf)) x_vars[i] = self.lwr + (self.upr - self.lwr) * scaled_x def trans_X_to_Z(self, x_vars, z_vars): for i in range(self.dim): betacdf = self.x[i].cdf(x_vars[i].reshape(len(x_vars[i]), 1)) betacdf = betacdf.reshape(len(betacdf)) z_vars[i] = self.normal.ppf(betacdf) def trans_Z_to_U(self, z_vars, u_vars): # decorrelate the variables res = self.L_inverse.dot(self.D_inverse.dot(z_vars - self.M)) # transform to uniform space for i, zi in enumerate(res): u_vars[i] = self.normal.cdf(zi) def trans_U_to_Z(self, u_vars, z_vars): # transform to std normal space for i, ui in enumerate(u_vars): z_vars[i] = self.normal.ppf(ui) # apply the correlation res = self.D.dot(self.L.dot(z_vars)) + self.M # transform to space of correlated normal for i, zi in enumerate(res): z_vars[i] = zi
def withNormalDistribution(self, mu, sigma, alpha): self._dist = Normal.by_alpha(mu, sigma, alpha) return self
parser.add_argument('--level', default=2, type=int, help="minimum level of regular grids") parser.add_argument('--marginalType', default="beta", type=str, help="marginals") args = parser.parse_args() if args.marginalType == "uniform": marginal = Uniform(0, 1) elif args.marginalType == "beta": marginal = Beta(5, 10) else: marginal = Normal(0.5, 0.1, 0, 1) # plot pdf dist = J([marginal] * numDims) fig = plt.figure() plotDensity2d(dist) savefig(fig, "/tmp/%s" % (args.marginalType, )) plt.close(fig) w = pysgpp.singleFunc(marginal.pdf) grids = pysgpp.AbstractPointHierarchyVector() grids.push_back(pysgpp.CombiHierarchies.linearLeja(w)) grids.push_back(pysgpp.CombiHierarchies.linearLeja(w)) evaluators = pysgpp.FloatScalarAbstractLinearEvaluatorVector()
# Copyright (C) 2008-today The SG++ project # This file is part of the SG++ project. For conditions of distribution and # use, please see the copyright notice provided with SG++ or at # sgpp.sparsegrids.org import numpy as np import matplotlib.pyplot as plt from pysgpp import DataVector, Grid, createOperationHierarchisation, createOperationEval from pysgpp.extensions.datadriven.uq.operations import hierarchize from pysgpp.extensions.datadriven.uq.plot import plotFunction3d, plotSG3d from pysgpp.extensions.datadriven.uq.dists import Normal, J from pysgpp.extensions.datadriven.uq.operations.sparse_grid import evalSGFunction U = J([Normal.by_alpha(0.5, 0.05, 0.001), Normal.by_alpha(0.5, 0.05, 0.001)]) grid = Grid.createPolyGrid(2, 2) grid.getGenerator().regular(3) gs = grid.getStorage() nodalValues = np.ndarray(gs.getSize()) p = DataVector(2) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) nodalValues[i] = U.pdf(p.array()) alpha = hierarchize(grid, nodalValues) fig, _, _ = plotFunction3d(U.pdf)
class NatafTransformation(object): def __init__(self, data, sample_type=None, dist=None): from pysgpp.extensions.datadriven.uq.dists import Uniform, Beta, SGDEdist, Normal, GaussianKDEDist from pysgpp.extensions.datadriven.uq.quadrature.marginalization.marginalization import doMarginalize # fix stochastic setting self.alpha, self.beta = 5., 10. self.lwr, self.upr = 0., 1. self.normal = Normal(0, 1, -2, 2) self.uniform = Uniform(self.lwr, self.upr) self.b = Beta(self.alpha, self.beta, self.lwr, self.upr) self.dim = data.shape[0] if sample_type == 'cbeta': # marginalize the density opMar = createOperationDensityMargTo1DKDE(dist.dist) kdex = GaussianKDE() opMar.margToDimX(kdex, 0) kdey = GaussianKDE() opMar.margToDimX(kdey, 1) # set the mean vector and the correlation matrix self.x = [GaussianKDEDist(kdex.getSamples().array()), GaussianKDEDist(kdey.getSamples().array())] self.M = np.array([[kdex.mean(), kdey.mean()]]).T self.S = dist.corrcoeff() else: self.x = [self.b, self.b] self.M = np.array([[self.b.mean(), self.b.mean()]]).T self.S = np.array([[1., 0.], [0., 1.]]) # compute the correlation matrix from the covariance matrix # this is used to transform the results back to the original space self.D = np.diag(np.sqrt(np.diag(self.S))) # divide the diagonal by the standard deviation of the diagonal elements self.D_inverse = np.diag(1. / np.sqrt(np.diag(self.S))) self.C = self.D_inverse.dot(self.S.dot(self.D_inverse)) # fig = plt.figure() # plotDensity1d(self.x[0]) # plotDensity1d(self.b) # fig.show() # # fig = plt.figure() # plotDensity1d(self.x[1]) # plotDensity1d(self.b) # fig.show() # compute cholesky decomposition self.L = np.linalg.cholesky(self.C) # adjust it according to [Lu ...] # nothing needs to be done for uniform <--> uniform self.L = self.L self.L_inverse = np.linalg.inv(self.L) assert abs(np.sum(self.C - self.L.dot(self.L.T))) < 1e-14 assert abs(np.sum(self.S - self.D.dot(self.L.dot(self.L.T.dot(self.D))))) < 1e-14 def trans_U_to_X(self, u_vars, x_vars): z_vars = np.zeros(u_vars.shape) self.trans_U_to_Z(u_vars, z_vars) self.trans_Z_to_X(z_vars, x_vars) def trans_X_to_U(self, x_vars, u_vars): z_vars = np.zeros(u_vars.shape) self.trans_X_to_Z(x_vars, z_vars) self.trans_Z_to_U(z_vars, u_vars) def trans_Z_to_X(self, z_vars, x_vars): for i in xrange(self.dim): normcdf = self.normal.cdf(z_vars[i]) scaled_x = self.x[i].ppf(normcdf.reshape(len(normcdf), 1)) scaled_x = scaled_x.reshape(len(normcdf)) x_vars[i] = self.lwr + (self.upr - self.lwr) * scaled_x def trans_X_to_Z(self, x_vars, z_vars): for i in xrange(self.dim): betacdf = self.x[i].cdf(x_vars[i].reshape(len(x_vars[i]), 1)) betacdf = betacdf.reshape(len(betacdf)) z_vars[i] = self.normal.ppf(betacdf) def trans_Z_to_U(self, z_vars, u_vars): # decorrelate the variables res = self.L_inverse.dot(self.D_inverse.dot(z_vars - self.M)) # transform to uniform space for i, zi in enumerate(res): u_vars[i] = self.normal.cdf(zi) def trans_U_to_Z(self, u_vars, z_vars): # transform to std normal space for i, ui in enumerate(u_vars): z_vars[i] = self.normal.ppf(ui) # apply the correlation res = self.D.dot(self.L.dot(z_vars)) + self.M # transform to space of correlated normal for i, zi in enumerate(res): z_vars[i] = zi