def computeSystemMatrixForVarianceProjected(self, gs, gpsi, basisi, gpsj,
                                                basisj, dist, trans, dims):
        if isinstance(dist, SGDEdist):
            # project distribution on desired dimensions
            # get the objects needed for integrating
            # the current dimensions
            gpsk, basisk = project(dist.grid, list(range(len(dims))))
            # compute the bilinear form
            # -> the measure needs to be uniform, since it is already
            #    encoded in the sparse grid density
            self.trilinearForm.setDistributionAndTransformation(
                [Uniform(0, 1)] * gs.getDimension(), None)
            A_idim, erri = self.trilinearForm.computeTrilinearFormByList(
                gs, gpsk, basisk, dist.alpha, gpsi, basisi, gpsj, basisj)
        else:
            # the distribution is given analytically, handle them
            # analytically in the integration of the basis functions
            if isinstance(dist, Dist) and len(dims) > 1:
                #                 print "WARNINING: results are just approximated -> not independent random variables"
                # marginalize the densities and continue
                marg_dists = [None] * len(dims)
                for i, idim in enumerate(dims):
                    marg_dists[i] = dist.marginalizeToDimX(idim)
                dist = marg_dists
                trans = trans.getTransformations()

            if isinstance(dist, Dist):
                dist = [dist]
                trans = [trans]

            self.bilinearForm.setDistributionAndTransformation(dist, trans)
            A_idim, erri = self.bilinearForm.computeBilinearFormByList(
                gs, gpsi, basisi, gpsj, basisj)
        return A_idim, erri
    def computeSystemMatrixForMeanProjected(self, gs, gpsi, basisi, dist,
                                            trans, dims):
        if isinstance(dist, SGDEdist):
            # if the distribution is given as a sparse grid function we
            # need to compute the bilinear form of the grids
            # accumulate objects needed for computing the bilinear form
            assert len(dims) == dist.grid.getStorage().getDimension()
            gpsj, basisj = project(dist.grid, list(range(len(dims))))

            # compute the bilinear form
            # -> the measure needs to be uniform, since it is already
            #    encoded in the sparse grid density
            self.bilinearForm.setDistributionAndTransformation(
                [Uniform(0, 1)] * gs.getDimension(), None)
            A, erri = self.bilinearForm.computeBilinearFormByList(
                gs, gpsi, basisi, gpsj, basisj)
            # weight it with the coefficient of the density function
            tmp = A.dot(dist.alpha)
        else:
            # the distribution is given analytically, handle them
            # analytically in the integration of the basis functions
            if isinstance(dist, Dist) and len(dims) > 1:
                #                 print "WARNINING: results are just approximated -> not independent random variables"
                # marginalize the densities and continue
                marg_dists = [None] * len(dims)
                for i, idim in enumerate(dims):
                    marg_dists[i] = dist.marginalizeToDimX(idim)
                dist = marg_dists
                trans = trans.getTransformations()

            if isinstance(dist, Dist):
                dist = [dist]
                trans = [trans]

            self.linearForm.setDistributionAndTransformation(dist, trans)
            tmp, erri = self.linearForm.computeLinearFormByList(
                gs, gpsi, basisi)

        return tmp, erri
Exemple #3
0
def run_densityEstimation(
        functionName,
        method,
        kfold=20,
        numDims=2,
        numSamples=1000,
        candidates="join",
        bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE,
        out=True,
        plot=False,
        tikz=False):
    if method == "sgde_zero":
        interpolation = "zero"
    else:  # interpolation == "boundaries":
        interpolation = "boundaries"

    samples, bounds, natafType = load_data_set(functionName, numSamples,
                                               numDims)

    # do kfold cross validation
    crossEntropyValidation = np.zeros((kfold, 2))
    learnSamples, validationSamples = splitset(samples, splitPercentage=0.7)

    stats = {}
    for i in range(kfold):
        print("=" * 100)
        print("run (%s)= %i/%i" % (method, i + 1, kfold))
        print("=" * 100)
        print("valid: %i x %i (mean=%g, var=%g)" %
              (validationSamples.shape[0], validationSamples.shape[1],
               np.mean(validationSamples), np.var(validationSamples)))

        np.random.seed(i * 123456 + i % 2)
        trainSamples, testSamples = splitset(learnSamples,
                                             splitPercentage=1. - 1. / kfold)

        if "sgde" in method:
            dist, stats[i] = estimateSGDEDensity(functionName,
                                                 trainSamples,
                                                 testSamples,
                                                 bounds=bounds,
                                                 iteration=i,
                                                 plot=plot,
                                                 label=method,
                                                 out=out,
                                                 candidates=candidates,
                                                 interpolation=interpolation)
        elif "kde" in method:
            dist, stats[i] = estimateKDEDensity(
                functionName,
                trainSamples,
                testSamples,
                iteration=i,
                plot=plot,
                label=method,
                out=out,
                bandwidthOptimizationTypeStr=bandwidthOptimizationType)
        elif "nataf" in method:
            # estimate nataf density
            dist, stats[i] = estimateNatafDensity(functionName,
                                                  natafType,
                                                  testSamples,
                                                  iteration=i,
                                                  bounds=bounds,
                                                  plot=plot,
                                                  label=method,
                                                  out=out)
        else:
            raise AttributeError("unknown config '%s'" % method)

        # evaluate the distribution according to the validation set
        crossEntropyValidation[i, 0] = i
        crossEntropyValidation[i, 1] = dist.crossEntropy(validationSamples)
        stats[i]["crossEntropyValidation"] = dist.crossEntropy(
            validationSamples)
        stats[i]["validationSamples"] = validationSamples
        stats[i]["samples"] = {"shuffled": {}, "not_shuffled": {}}
        stats[i]["samples"]["shuffled"]["rvs"] = dist.rvs(numSamples,
                                                          shuffle=True)
        stats[i]["samples"]["shuffled"]["uniform_validation"] = dist.cdf(
            validationSamples, shuffle=True)
        kstests = [None] * numDims

        for idim in range(numDims):
            samples1d = stats[i]["samples"]["shuffled"][
                "uniform_validation"][:, idim]
            res_test = kstest(samples1d, Uniform(0, 1).cdf)
            kstests[idim] = res_test.statistic, res_test.pvalue
            if plot:
                plt.figure()
                plt.hist(samples1d, cumulative=True, normed=True)
                xs = np.linspace(0, 1, 10)
                plt.plot(xs, [Uniform(0, 1).cdf(xi) for xi in xs])
                plt.title("shuffled: %i, %s" % (idim, kstests[idim]))
        print("-" * 80)
        print("shuffled    ", kstests, np.min(kstests), np.max(kstests))
        if plot:
            plt.show()

        stats[i]["samples"]["shuffled"]["kstests"] = kstests
        stats[i]["samples"]["not_shuffled"]["rvs"] = dist.rvs(numSamples,
                                                              shuffle=False)
        stats[i]["samples"]["not_shuffled"]["uniform_validation"] = dist.cdf(
            validationSamples, shuffle=False)
        kstests = [None] * numDims
        for idim in range(numDims):
            samples1d = stats[i]["samples"]["not_shuffled"][
                "uniform_validation"][:, idim]
            res_test = kstest(samples1d, Uniform(0, 1).cdf)
            kstests[idim] = res_test.statistic, res_test.pvalue
            if plot:
                plt.figure()
                plt.hist(samples1d, cumulative=True, normed=True)
                xs = np.linspace(0, 1, 1000)
                plt.plot(xs, [Uniform(0, 1).cdf(xi) for xi in xs])
                plt.title("not shuffled: %i, %s" % (idim, kstests[idim]))
        print("not shuffled", kstests, np.min(kstests), np.max(kstests))
        if plot:
            plt.show()

        stats[i]["samples"]["not_shuffled"]["kstests"] = kstests

        print("CV valid = %g" % crossEntropyValidation[i, 1])

        # write results to file
        if out:
            out_crossEntropy = os.path.join(
                "data", method, "%s.%s.validation.cross_entropies.csv" %
                (method, functionName))
            np.savetxt(out_crossEntropy, crossEntropyValidation[:i, :])

            # save stats to pickle
            out_stats = os.path.join(
                "data", method,
                "%s.%s.best.stats.pkl" % (method, functionName))
            fd = open(out_stats, "w")
            pkl.dump(stats, fd)
            fd.close()