예제 #1
0
    def test2DCovarianceMatrix(self):
        # prepare data
        C = np.array([[0.1, 0.08, 0.02], [0.08, 0.1, 0.02], [0.02, 0.02, 0.1]]) / 10.

        U = dists.MultivariateNormal([0.5, 0.5, 0.5], C, 0, 1)
        samples = U.rvs(20000)
        dist = KDEDist(samples,
                       kernelType=KernelType_EPANECHNIKOV,
                       bounds=U.getBounds())

        # print the results
        self.assertTrue(np.linalg.norm(C - dist.cov()) < 1e-2, "KDE cov wrong")
        self.assertTrue(np.linalg.norm(np.corrcoef(samples.T) - dist.corrcoeff()) < 1e-1, "KDE corrcoef wrong")
예제 #2
0
    def test2DNormalMoments(self):
        mean = 0
        var = 0.5

        U = dists.J([dists.Normal(mean, var, -2, 2),
                     dists.Normal(mean, var, -2, 2)])

        trainSamples = U.rvs(10000)
        dist = KDEDist(trainSamples)

        # -----------------------------------------------
        self.assertTrue(np.abs(U.mean() - dist.mean()) < 1e-2, "KDE mean wrong")
        self.assertTrue(np.abs(U.var() - dist.var()) < 1e-2, "KDE variance wrong")
예제 #3
0
    def test2DCDFandPPF(self):
        # prepare data
        C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10.
        U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1)
        train_samples = U.rvs(1000)

        fig = plt.figure()
        plotDensity2d(U)
        plt.title('true density')
        fig.show()

        dist = KDEDist(train_samples, bounds=U.getBounds())

        fig = plt.figure()
        plotDensity2d(dist)
        plt.title('estimated KDE density')
        fig.show()

        samples = dists.J([dists.Uniform(0, 1),
                           dists.Uniform(0, 1)]).rvs(1000)

        fig = plt.figure()
        plt.plot(samples[:, 0], samples[:, 1], "o ")
        plt.title('u space')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        fig.show()

        transformed_samples = dist.ppf(samples)

        fig = plt.figure()
        plt.plot(transformed_samples[:, 0], transformed_samples[:, 1], "o ")
        plt.title('x space (transformed)')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        fig.show()

        samples = dist.cdf(transformed_samples)

        fig = plt.figure()
        plt.plot(samples[:, 0], samples[:, 1], "o ")
        plt.title('u space (transformed)')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        fig.show()

        plt.show()
예제 #4
0
    def test1DNormalDist(self):
        # prepare data
        U = dists.Normal(1.85, .3, 0, 3)
        trainSamples = np.array([U.rvs(500)]).T
        testSamples = np.array([U.rvs(1000)]).T

        # build parameter set
        dist = KDEDist(trainSamples,
                       kernelType=KernelType_GAUSSIAN,
                       bandwidthOptimizationType=
                       BandwidthOptimizationType_MAXIMUMLIKELIHOOD,
                       bounds=U.getBounds())

        #         fig = plt.figure()
        #         plotDensity1d(U)
        #         plotDensity1d(dist)

        print("quad = %s" % (quad(lambda x: dist.pdf([x]), 0, 3), ))
        print("mean = %g ~ %g" % (U.mean(), dist.mean()))
        print("var = %g ~ %g" % (U.var(), dist.var()))
        print("KL = %g" % U.klDivergence(dist, testSamples, testSamples))
        print("CE = %g" % dist.crossEntropy(testSamples))
        print("MSE = %g" % dist.l2error(U, testSamples, testSamples))

        plt.show()
예제 #5
0
    def test1DCDFandPPF(self):
        # prepare data
        U = Normal(0.5, 0.1, 0, 1)
        train_samples = U.rvs(1000).reshape(1000, 1)

        dist = KDEDist(train_samples, kernelType=KernelType_EPANECHNIKOV)

        rc('font', **{'size': 18})

        fig = plt.figure()
        x = np.linspace(0, 1, 1000)
        plt.plot(x, dist.cdf(x), label="estimated")
        plt.plot(x, [U.cdf(xi) for xi in x], label="analytic")
        plt.legend(loc="lower right")
        fig.show()

        fig = plt.figure()
        plt.hist(train_samples, normed=True)
        plotDensity1d(U, label="analytic")
        plotDensity1d(dist, label="estimated")
        plt.title("original space")
        plt.legend()
        fig.show()

        transformed_samples = dist.cdf(train_samples)

        fig = plt.figure()
        plt.hist(transformed_samples, normed=True)
        plt.title("uniform space")
        fig.show()

        transformed_samples = dist.ppf(transformed_samples)

        fig = plt.figure()
        plt.hist(transformed_samples, normed=True)
        plotDensity1d(U, label="analytic")
        plotDensity1d(dist, label="estimated")
        plt.title("original space")
        plt.legend()
        fig.show()

        plt.show()
예제 #6
0
    def test2DMarginalize(self):
        # prepare data
        C = np.array([[0.2, 0.08], [0.08, 0.2]]) / 10.
        U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1)

        fig = plt.figure()
        plotDensity2d(U)
        plt.title('true density')
        fig.show()

        samples = U.rvs(1000)
        kde = KDEDist(samples)

        #         fig = plt.figure()
        #         plotDensity2d(kde)
        #         plt.title('estimated KDE density')
        #         fig.show()

        # marginalize

        opMarg = createOperationDensityMarginalizeKDE(kde.dist)
        kdeX = kde.marginalizeToDimX(0)
        kdeY = kde.marginalizeToDimX(1)

        fig = plt.figure()
        plotDensity1d(kdeX)
        plotDensity1d(kdeY)
        plt.title('margToDimX denstities')
        fig.show()

        kdeX = kde.marginalize(1)
        kdeY = kde.marginalize(0)

        fig = plt.figure()
        plotDensity1d(kdeX)
        plotDensity1d(kdeY)
        plt.title('doMarginalize denstities')
        fig.show()

        plt.show()
예제 #7
0
    def test2DPPF(self):
        # prepare data
        C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10.
        U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1)

        fig = plt.figure()
        plotDensity2d(U)
        plt.title('true density')
        fig.show()

        dist = KDEDist(U.rvs(1000),
                       kernelType=KernelType_EPANECHNIKOV,
                       bounds=U.getBounds())

        fig = plt.figure()
        plotDensity2d(dist)
        plt.title('estimated KDE density')
        fig.show()

        samples = dists.J([dists.Uniform(0, 1),
                           dists.Uniform(0, 1)]).rvs(1000)

        fig = plt.figure()
        plt.plot(samples[:, 0], samples[:, 1], "o ")
        plt.title('uniformly drawn samples')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        fig.show()

        transformed_samples = dist.ppf(samples)

        fig = plt.figure()
        plt.plot(transformed_samples[:, 0], transformed_samples[:, 1], "o ")
        plt.title('transformed samples')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        fig.show()

        plt.show()
예제 #8
0
def estimateKDEDensity(functionName,
                       trainSamples,
                       testSamples=None,
                       iteration=0,
                       plot=False,
                       out=True,
                       label="kde_gaussian",
                       bandwidthOptimizationTypeStr="rot"):
    print("train: %i x %i (mean=%g, var=%g)" %
          (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples),
           np.var(trainSamples)))
    if testSamples is not None:
        print("test : %i x %i (mean=%g, var=%g)" %
              (testSamples.shape[0], testSamples.shape[1],
               np.mean(testSamples), np.var(testSamples)))

    if "gaussian" in label:
        kernelType = KernelType_GAUSSIAN
    elif "epanechnikov" in label:
        kernelType = KernelType_EPANECHNIKOV
    else:
        raise AttributeError("label is unknown")

    bandwidthOptimizationType = strTobandwidthOptimizationType(
        bandwidthOptimizationTypeStr)
    kdeDist = KDEDist(trainSamples,
                      kernelType=kernelType,
                      bandwidthOptimizationType=bandwidthOptimizationType)
    # -----------------------------------------------------------
    cvKDE = kdeDist.crossEntropy(testSamples)

    if plot and kdeDist.getDim() == 2:
        fig = plt.figure()
        plotDensity2d(kdeDist)
        plt.title("log=%g" % cvKDE)
        if out:
            plt.tight_layout()
            plt.savefig(
                os.path.join(pathResults, "kde_dist.%s.i%i.jpg" %
                             (functionName, iteration)))
            plt.savefig(
                os.path.join(pathResults, "kde_dist.%s.i%i.pdf" %
                             (functionName, iteration)))
            if out:
                plt.close(fig)
        else:
            plt.show()

    print("CV test = %g" % cvKDE)

    # -----------------------------------------------------------
    if out:
        pathResults = os.path.join("data", label)

        # serialize cross entropies
        out_crossEntropies = os.path.join(
            pathResults,
            "kde_cross_entropies.%s.i%i.csv" % (functionName, iteration))
        fd = open(out_crossEntropies, 'wb')
        file_writer = csv.writer(fd)
        file_writer.writerow(["crossEntropy"])
        file_writer.writerow([cvKDE])
        fd.close()

        # serialize samples
        np.savetxt(
            os.path.join(
                pathResults,
                "kde_train_samples.%s.i%i.csv" % (functionName, iteration)),
            trainSamples)
        np.savetxt(
            os.path.join(
                pathResults,
                "kde_test_samples.%s.i%i.csv" % (functionName, iteration)),
            testSamples)

        if plot:
            # plot density
            fig = plt.figure()
            plotDensity2d(kdeDist)
            plt.title("%s -> CV = %g" % (kdeDist.getBandwidths(), cvKDE))
            plt.savefig(
                os.path.join(pathResults,
                             "kde_pdf.%s.i%i.jpg" % (functionName, iteration)))
            plt.close(fig)

        # serialize best configuration to json
        out_bestDist = os.path.join(
            pathResults,
            "kde_best_config.%s.i%i.json" % (functionName, iteration))
        text = kdeDist.toJson()
        fd = open(out_bestDist, "w")
        fd.write(text)
        fd.close()

    # stats
    stats = {
        'config': {
            'functionName': functionName,
            'numDims': 2,
            'label': label,
            'bandwidth_optimization':
            BandwidthOptimizationType_MAXIMUMLIKELIHOOD,
            'kernelType': kernelType,
            'iteration': iteration
        },
        'trainSamples': trainSamples,
        'testSamples': testSamples,
        'crossEntropyTrainKDE': kdeDist.crossEntropy(trainSamples),
        'crossEntropyTestKDE': cvKDE,
        'KDEDist_json': kdeDist.toJson()
    }

    return kdeDist, stats
예제 #9
0
    def __init__(self, data, sample_type=None, dist=None):
        from pysgpp.extensions.datadriven.uq.dists import Uniform, Beta, SGDEdist, Normal, KDEDist
        from pysgpp.extensions.datadriven.uq.quadrature.marginalization.marginalization import doMarginalize

        # fix stochastic setting
        self.alpha, self.beta = 5., 10.
        self.lwr, self.upr = 0., 1.
        self.normal = Normal(0, 1, -2, 2)
        self.uniform = Uniform(self.lwr, self.upr)
        self.b = Beta(self.alpha, self.beta, self.lwr, self.upr)
        self.dim = data.shape[0]

        if sample_type == 'cbeta':
            # marginalize the density
            opMar = createOperationDensityMargTo1DKDE(dist.dist)
            kdex = KernelDensityEstimator()
            opMar.margToDimX(kdex, 0)
            kdey = KernelDensityEstimator()
            opMar.margToDimX(kdey, 1)

            # set the mean vector and the correlation matrix
            self.x = [
                KDEDist(kdex.getSamples().array()),
                KDEDist(kdey.getSamples().array())
            ]
            self.M = np.array([[kdex.mean(), kdey.mean()]]).T
            self.S = dist.corrcoeff()
        else:
            self.x = [self.b, self.b]
            self.M = np.array([[self.b.mean(), self.b.mean()]]).T
            self.S = np.array([[1., 0.], [0., 1.]])

        # compute the correlation matrix from the covariance matrix
        # this is used to transform the results back to the original space
        self.D = np.diag(np.sqrt(np.diag(self.S)))
        # divide the diagonal by the standard deviation of the diagonal elements
        self.D_inverse = np.diag(1. / np.sqrt(np.diag(self.S)))
        self.C = self.D_inverse.dot(self.S.dot(self.D_inverse))

        #         fig = plt.figure()
        #         plotDensity1d(self.x[0])
        #         plotDensity1d(self.b)
        #         fig.show()
        #
        #         fig = plt.figure()
        #         plotDensity1d(self.x[1])
        #         plotDensity1d(self.b)
        #         fig.show()

        # compute cholesky decomposition
        self.L = np.linalg.cholesky(self.C)

        # adjust it according to [Lu ...]
        # nothing needs to be done for uniform <--> uniform
        self.L = self.L
        self.L_inverse = np.linalg.inv(self.L)

        assert abs(np.sum(self.C - self.L.dot(self.L.T))) < 1e-14
        assert abs(
            np.sum(self.S -
                   self.D.dot(self.L.dot(self.L.T.dot(self.D))))) < 1e-14