Python SGDEdist.crossEntropy Examples

Programming Language: Python

Namespace/Package Name: pysgpp.extensions.datadriven.uq.dists

Class/Type: SGDEdist

Method/Function: crossEntropy

Examples at hotexamples.com: 2

Python SGDEdist.crossEntropy - 2 examples found. These are the top rated real world Python examples of pysgpp.extensions.datadriven.uq.dists.SGDEdist.crossEntropy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

byLearnerSGDEConfig(12)

SGDEdist(5)

crossEntropy(2)

mean(2)

var(2)

byConfig(1)

fromSGFunction(1)

toJson(1)

Example #1

Show file

def estimateSGDEDensity(functionName,
                        trainSamples,
                        testSamples=None,
                        bounds=None,
                        iteration=0,
                        plot=False,
                        out=True,
                        label="sgde_zero",
                        candidates="intersections",
                        interpolation="setToZero"):
    print("train: %i x %i (mean=%g, var=%g)" %
          (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples),
           np.var(trainSamples)))
    if testSamples is not None:
        print("test : %i x %i (mean=%g, var=%g)" %
              (testSamples.shape[0], testSamples.shape[1],
               np.mean(testSamples), np.var(testSamples)))

    candidateSearchAlgorithm = strToCandidateSearchAlgorithm(candidates)
    interpolationAlgorithm = strToInterpolationAlgorithm(interpolation)

    results = {}
    crossEntropies = {}
    config = {
        "grid_level": 1,
        "grid_type": "linear",
        "grid_maxDegree": 1,
        "refinement_numSteps": 0,
        "refinement_numPoints": 3,
        "solver_threshold": 1e-10,
        "solver_verbose": False,
        "regularization_type": "Laplace",
        "crossValidation_enable": True,
        "crossValidation_kfold": 5,
        "crossValidation_silent": True,
        "sgde_makePositive": False
    }

    pathResults = os.path.join("data", label)
    key = 1
    bestCV = float("Inf")
    bestDist = None

    # stats
    stats = {
        'config': {
            'functionName': functionName,
            'numDims': 2,
            'adaptive': True,
            'refnums': 0,
            'consistentGrid': True,
            'candidateSearchAlgorithm': candidates,
            'interpolationAlgorithm': interpolation,
            'maxNumGridPoints': 0,
            'iteration': iteration
        },
        'trainSamples': trainSamples,
        'testSamples': testSamples
    }

    for level in range(2, 7):
        print("-" * 60)
        print("l=%i" % level)
        for refinementSteps in range(0, 5):
            config["grid_level"] = level
            config["refinement_numSteps"] = refinementSteps
            sgdeDist = SGDEdist.byLearnerSGDEConfig(trainSamples,
                                                    config=config,
                                                    bounds=bounds)
            # -----------------------------------------------------------
            grid, alpha = sgdeDist.grid, sgdeDist.alpha
            cvSgde = sgdeDist.crossEntropy(testSamples)

            maxLevel = grid.getStorage().getMaxLevel()
            numDims = grid.getStorage().getDimension()

            print("  " + "-" * 30)
            print("  #ref = %i: gs=%i -> CV test = %g" %
                  (refinementSteps, sgdeDist.grid.getSize(), cvSgde))
            # -----------------------------------------------------------
            # make it positive
            positiveGrid = grid.clone()
            positiveAlpha_vec = DataVector(alpha)
            opPositive = createOperationMakePositive(candidateSearchAlgorithm,
                                                     interpolationAlgorithm,
                                                     True, False)
            opPositive.makePositive(positiveGrid, positiveAlpha_vec, True)

            # scale to unit integrand
            positiveAlpha = positiveAlpha_vec.array()
            positiveSgdeDist = SGDEdist(positiveGrid,
                                        positiveAlpha,
                                        trainSamples,
                                        bounds=bounds)
            # -----------------------------------------------------------
            cvPositiveSgde = positiveSgdeDist.crossEntropy(testSamples)

            if plot and numDims == 2:
                fig = plt.figure()
                plotSG2d(grid,
                         alpha,
                         show_negative=True,
                         show_grid_points=True)
                plt.title("pos: N=%i: vol=%g, log=%g" %
                          (positiveGrid.getSize(),
                           doQuadrature(positiveGrid,
                                        positiveAlpha), cvPositiveSgde))
                plt.tight_layout()
                if out:
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_density_pos_i%i_l%i_r%i.jpg" %
                            (label, iteration, level, refinementSteps)))
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_density_pos_i%i_l%i_r%i.pdf" %
                            (label, iteration, level, refinementSteps)))
                else:
                    plt.close(fig)

            # -----------------------------------------------------------
            print("  positive: gs=%i -> CV test = %g" %
                  (positiveGrid.getSize(), cvPositiveSgde))
            # -----------------------------------------------------------
            # select the best density available based on the given criterion
            results[key] = {'config': config, 'dist': positiveSgdeDist}
            crossEntropies[key] = cvPositiveSgde
            key += 1
            candidateSearch = opPositive.getCandidateSetAlgorithm()

            if cvPositiveSgde < bestCV:
                bestCV = cvPositiveSgde
                bestDist = positiveSgdeDist
                numComparisons = candidateSearch.costsComputingCandidates()

                # update the stats -> just for the current best one
                # write the stats of the current best results to the stats dict
                C = np.ndarray(numDims - 1, dtype="int")
                M = np.sum([1 for i in range(len(alpha)) if alpha[i] < 0])
                for d in range(2, numDims + 1):
                    C[d - 2] = binom(M, d)

                stats['config']['refnums'] = refinementSteps
                stats['config']['adaptive'] = refinementSteps > 0
                stats['negSGDE_json'] = sgdeDist.toJson()
                stats['posSGDE_json'] = positiveSgdeDist.toJson()
                stats['level'] = level
                stats['maxLevel'] = maxLevel
                stats['fullGridSize'] = (2**maxLevel - 1)**numDims
                stats['sparseGridSize'] = grid.getSize()
                stats['discretizedGridSize'] = positiveGrid.getSize()
                stats['crossEntropyTrainZeroSGDE'] = sgdeDist.crossEntropy(
                    trainSamples)
                stats[
                    'crossEntropyTrainDiscretizedSGDE'] = positiveSgdeDist.crossEntropy(
                        trainSamples)
                stats['crossEntropyTestZeroSGDE'] = cvSgde
                stats['crossEntropyTestDiscretizedSGDE'] = cvPositiveSgde
                stats['numCandidates'] = int(candidateSearch.numCandidates())
                stats['numCandidatesPerLevel'] = np.array(
                    candidateSearch.numCandidatesPerLevel().array(),
                    dtype="int")
                stats['numCandidatesPerIteration'] = np.array(
                    candidateSearch.numCandidatesPerIteration().array(),
                    dtype="int")
                stats[
                    'costsCandidateSearch'] = candidateSearch.costsComputingCandidates(
                    )
                stats['costsCandidateSearchBinomial'] = int(C.sum())
                stats['costsCandidateSearchPerIteration'] = np.array(
                    candidateSearch.costsComputingCandidatesPerIteration(
                    ).array(),
                    dtype="int")
                stats['costsCandidateSearchPerIterationBinomial'] = C

                if plot and numDims == 2:
                    fig = plt.figure()
                    plotSG2d(
                        positiveGrid,
                        positiveAlpha,
                        show_negative=True,
                        show_grid_points=False,
                        colorbarLabel=
                        r"$f_{\mathcal{I}^\text{SG} \cup \mathcal{I}^\text{ext}}$"
                    )
                    plt.title(r"positive: $N=%i/%i$; \# comparisons$=%i$" %
                              (positiveGrid.getSize(),
                               (2**maxLevel - 1)**numDims, numComparisons))
                    plt.xlabel(r"$\xi_1$")
                    plt.ylabel(r"$\xi_2$")
                    #                     plt.title(r"N=%i $\rightarrow$ %i: log=%g $\rightarrow$ %g" % (sgdeDist.grid.getSize(),
                    #                                                                                    positiveSgdeDist.grid.getSize(),
                    #                                                                                    cvSgde,
                    #                                                                                    cvPositiveSgde))
                    plt.tight_layout()
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i.jpg" %
                            (label, iteration, level, refinementSteps)))
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i.pdf" %
                            (label, iteration, level, refinementSteps)))
                    if out:
                        plt.close(fig)

                    fig, ax, _ = plotSG3d(positiveGrid, positiveAlpha)
                    ax.set_zlabel(
                        r"$f_{\mathcal{I}^{\text{SG}} \cup \mathcal{I}^\text{ext}}(\xi_1, \xi_2)$",
                        fontsize=20)
                    ax.set_xlabel(r"$\xi_1$", fontsize=20)
                    ax.set_ylabel(r"$\xi_2$", fontsize=20)

                    plt.tight_layout()
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i_3d.jpg" %
                            (label, iteration, level, refinementSteps)))
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i_3d.pdf" %
                            (label, iteration, level, refinementSteps)))
                    if out:
                        plt.close(fig)

            if plot and numDims == 2 and not out:
                plt.show()

    if out:
        # save stats
        filename = os.path.join(
            "data", label, "stats_d%i_a%i_r%i_i%i_%s_%s.pkl" %
            (numDims, 1, refinementSteps, iteration, candidates,
             interpolation))
        fd = open(filename, "w")
        pkl.dump(stats, fd)
        fd.close()
        print("stats saved to -> '%s'" % filename)

        # dictionary that stores the information on the estimated densities
        myjson = {
            "Grid": {
                "dimNames": ["phi", "log(K_A)"],
                "matrixEntries": ["phi", "log(K_A)"]
            },
            "Set": {
                "path": "",
                "grids": [],
                "alphas": [],
                "paramValues": [],
                "paramName": "grid_size"
            }
        }

        for key, result in list(results.items()):
            config = result['config']
            dist = result['dist']
            # serialize grid and coefficients
            out = "sgde.i%i.k%i.N%i" % (iteration, key, dist.grid.getSize())
            out_grid = os.path.join(pathResults, "%s.grid" % out)
            out_alpha = os.path.join(pathResults, "%s.alpha.arff" % out)
            writeGrid(out_grid, dist.grid)
            writeAlphaARFF(out_alpha, dist.alpha)

            # collect information for json
            myjson["Set"]["grids"].append(os.path.abspath(out_grid))
            myjson["Set"]["alphas"].append(os.path.abspath(out_alpha))
            myjson["Set"]["paramValues"].append(crossEntropies[key])
            # -----------------------------------------------------------
            # serialize the config
            out_config = os.path.join(pathResults,
                                      "sgde.i%i.k%i.config" % (iteration, key))
            fd = open(out_config, "w")
            json.dump(config, fd, ensure_ascii=True, indent=True)
            fd.close()

            crossEntropies[key] = (crossEntropies[key], out_grid, out_alpha,
                                   out_config)

        # sort the results in myjson according to the cross entropy
        ixs = np.argsort(myjson["Set"]["paramValues"])
        myjson["Set"]["grids"] = [myjson["Set"]["grids"][ix] for ix in ixs]
        myjson["Set"]["alphas"] = [myjson["Set"]["alphas"][ix] for ix in ixs]
        myjson["Set"]["paramValues"] = [
            myjson["Set"]["paramValues"][ix] for ix in ixs
        ]

        # serialize myjson
        out_config = os.path.join(pathResults,
                                  "sgde_visualization.i%i.config" % iteration)
        fd = open(out_config, "w")
        json.dump(myjson, fd, ensure_ascii=True, indent=True)
        fd.close()

        # serialize cross entropies
        out_crossEntropies = os.path.join(
            pathResults, "sgde_cross_entropies.i%i.csv" % iteration)
        fd = open(out_crossEntropies, 'wb')
        file_writer = csv.writer(fd)
        file_writer.writerow(["crossEntropy", "grid", "alpha", "sgdeConfig"])
        for out in list(crossEntropies.values()):
            file_writer.writerow(out)
        fd.close()

        # serialize samples
        np.savetxt(
            os.path.join(pathResults,
                         "sgde_train_samples.i%i.csv" % iteration),
            trainSamples)
        np.savetxt(
            os.path.join(pathResults, "sgde_test_samples.i%i.csv" % iteration),
            testSamples)

        # serialize best configuration to json
        out_bestDist = os.path.join(pathResults,
                                    "sgde_best_config.i%i.json" % iteration)
        text = bestDist.toJson()
        fd = open(out_bestDist, "w")
        fd.write(text)
        fd.close()

    return bestDist, stats

Example #2

Show file

    for i in range(gs.getSize()):
        gs.getPoint(i).getStandardCoordinates(p)
        alpha[i] = dist.pdf(p.array())

    # hierarchize
    createOperationHierarchisation(grid).doHierarchisation(alpha)

alpha = alpha.array()
sgdeDist = SGDEdist(grid,
                    alpha,
                    trainData=trainSamples,
                    bounds=dist.getBounds())
print(
    "l=%i: (gs=%i) -> %g (%g, %g)," %
    (level, sgdeDist.grid.getSize(), dist.klDivergence(sgdeDist, testSamples),
     sgdeDist.crossEntropy(testSamples), sgdeDist.vol))
print("-" * 80)

if numDims == 2 and plot:
    # plot the result
    fig = plt.figure()
    plotGrid2d(grid, alpha, show_numbers=False)
    #     plt.title("neg: #gp = %i, kldivergence = %g, log = %g" % (grid.getStorage().getSize(),
    #                                                               dist.klDivergence(sgdeDist, testSamples),
    #                                                               dist.crossEntropy(testSamples)))
    fig.show()

    fig, ax, _ = plotSG3d(grid, sgdeDist.alpha)
    ax.set_title("negative")
    fig.show()