def plotResultsSG(self, grid, alpha, level, maxGridSize, refinement, iteration, out): fig, ax, _ = plotSG3d(grid, alpha) ax.set_title("eval") if out: filename = os.path.join( self.pathResults, "%s_%s_d%i_%s_l%i_Nmax%i_N%i_r%s_it%i.pdf" % (self.radix, "sg" if not isFull else "fg", self.numDims, grid.getTypeAsString(), level, maxGridSize, grid.getSize(), refinement, iteration)) plt.savefig(filename) trans = self.params.getJointTransformation() fig, ax, _ = plotError3d( lambda x: self.simulation(x), lambda x: evalSGFunction(grid, alpha, trans.probabilisticToUnit(x) ), xlim=[-2, 1], ylim=[0, 1]) ax.set_title("error") if out: filename = os.path.join( self.pathResults, "%s_error_%s_d%i_%s_l%i_Nmax%i_N%i_r%s_it%i.pdf" % (self.radix, "sg" if not isFull else "fg", self.numDims, grid.getTypeAsString(), level, maxGridSize, grid.getSize(), refinement, iteration)) plt.savefig(filename) if not out: plt.show()
def testMarginalEstimationStrategy(self): xlim = np.array([[-1, 1], [-1, 1]]) trans = JointTransformation() dists = [] for idim in range(xlim.shape[0]): trans.add(LinearTransformation(xlim[idim, 0], xlim[idim, 1])) dists.append(Uniform(xlim[idim, 0], xlim[idim, 1])) dist = J(dists) def f(x): return np.prod([(1 + xi) * (1 - xi) for xi in x]) def F(x): return 1. - x**3 / 3. grid, alpha_vec = interpolate(f, 1, 2, gridType=GridType_Poly, deg=2, trans=trans) alpha = alpha_vec.array() q = (F(1) - F(-1))**2 q1 = doQuadrature(grid, alpha) q2 = AnalyticEstimationStrategy().mean(grid, alpha, dist, trans)["value"] self.assertTrue(abs(q - q1) < 1e-10) self.assertTrue(abs(q - q2) < 1e-10) ngrid, nalpha, _ = MarginalAnalyticEstimationStrategy().mean( grid, alpha, dist, trans, [[0]]) self.assertTrue(abs(nalpha[0] - 2. / 3.) < 1e-10) plotSG3d(grid, alpha) plt.figure() plotSG1d(ngrid, nalpha) plt.show()
def discretize2d_linear(self): # discretize the product of both grid1, alpha1 = self.interpolate(2, 3, 2) grid2, alpha2 = self.interpolate(2, 3, 3) jgrid, jalpha = discretizeProduct(grid1, alpha1, grid2, alpha2) # get reference values def f(x): return evalSGFunction(grid1, alpha1, x) * evalSGFunction( grid2, alpha2, x) n = 50 fig, ax, y1 = plotFunction3d(f, n=n) ax.set_title("product") fig.show() fig, ax, y2 = plotSG3d(jgrid, jalpha, n=n) ax.set_title( "(size=%i, maxlevel=%i, deg=%i), err = %g" % (jgrid.getStorage().getSize(), jgrid.getStorage().getMaxLevel(), getDegree(jgrid), np.max(abs(y1 - y2)))) fig.show() assert np.max(np.abs(y1 - y2)) < 1e-13 plt.show()
def estimateSGDEDensity(functionName, trainSamples, testSamples=None, bounds=None, iteration=0, plot=False, out=True, label="sgde_zero", candidates="intersections", interpolation="setToZero"): print("train: %i x %i (mean=%g, var=%g)" % (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples), np.var(trainSamples))) if testSamples is not None: print("test : %i x %i (mean=%g, var=%g)" % (testSamples.shape[0], testSamples.shape[1], np.mean(testSamples), np.var(testSamples))) candidateSearchAlgorithm = strToCandidateSearchAlgorithm(candidates) interpolationAlgorithm = strToInterpolationAlgorithm(interpolation) results = {} crossEntropies = {} config = { "grid_level": 1, "grid_type": "linear", "grid_maxDegree": 1, "refinement_numSteps": 0, "refinement_numPoints": 3, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_enable": True, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": False } pathResults = os.path.join("data", label) key = 1 bestCV = float("Inf") bestDist = None # stats stats = { 'config': { 'functionName': functionName, 'numDims': 2, 'adaptive': True, 'refnums': 0, 'consistentGrid': True, 'candidateSearchAlgorithm': candidates, 'interpolationAlgorithm': interpolation, 'maxNumGridPoints': 0, 'iteration': iteration }, 'trainSamples': trainSamples, 'testSamples': testSamples } for level in range(2, 7): print("-" * 60) print("l=%i" % level) for refinementSteps in range(0, 5): config["grid_level"] = level config["refinement_numSteps"] = refinementSteps sgdeDist = SGDEdist.byLearnerSGDEConfig(trainSamples, config=config, bounds=bounds) # ----------------------------------------------------------- grid, alpha = sgdeDist.grid, sgdeDist.alpha cvSgde = sgdeDist.crossEntropy(testSamples) maxLevel = grid.getStorage().getMaxLevel() numDims = grid.getStorage().getDimension() print(" " + "-" * 30) print(" #ref = %i: gs=%i -> CV test = %g" % (refinementSteps, sgdeDist.grid.getSize(), cvSgde)) # ----------------------------------------------------------- # make it positive positiveGrid = grid.clone() positiveAlpha_vec = DataVector(alpha) opPositive = createOperationMakePositive(candidateSearchAlgorithm, interpolationAlgorithm, True, False) opPositive.makePositive(positiveGrid, positiveAlpha_vec, True) # scale to unit integrand positiveAlpha = positiveAlpha_vec.array() positiveSgdeDist = SGDEdist(positiveGrid, positiveAlpha, trainSamples, bounds=bounds) # ----------------------------------------------------------- cvPositiveSgde = positiveSgdeDist.crossEntropy(testSamples) if plot and numDims == 2: fig = plt.figure() plotSG2d(grid, alpha, show_negative=True, show_grid_points=True) plt.title("pos: N=%i: vol=%g, log=%g" % (positiveGrid.getSize(), doQuadrature(positiveGrid, positiveAlpha), cvPositiveSgde)) plt.tight_layout() if out: plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) else: plt.close(fig) # ----------------------------------------------------------- print(" positive: gs=%i -> CV test = %g" % (positiveGrid.getSize(), cvPositiveSgde)) # ----------------------------------------------------------- # select the best density available based on the given criterion results[key] = {'config': config, 'dist': positiveSgdeDist} crossEntropies[key] = cvPositiveSgde key += 1 candidateSearch = opPositive.getCandidateSetAlgorithm() if cvPositiveSgde < bestCV: bestCV = cvPositiveSgde bestDist = positiveSgdeDist numComparisons = candidateSearch.costsComputingCandidates() # update the stats -> just for the current best one # write the stats of the current best results to the stats dict C = np.ndarray(numDims - 1, dtype="int") M = np.sum([1 for i in range(len(alpha)) if alpha[i] < 0]) for d in range(2, numDims + 1): C[d - 2] = binom(M, d) stats['config']['refnums'] = refinementSteps stats['config']['adaptive'] = refinementSteps > 0 stats['negSGDE_json'] = sgdeDist.toJson() stats['posSGDE_json'] = positiveSgdeDist.toJson() stats['level'] = level stats['maxLevel'] = maxLevel stats['fullGridSize'] = (2**maxLevel - 1)**numDims stats['sparseGridSize'] = grid.getSize() stats['discretizedGridSize'] = positiveGrid.getSize() stats['crossEntropyTrainZeroSGDE'] = sgdeDist.crossEntropy( trainSamples) stats[ 'crossEntropyTrainDiscretizedSGDE'] = positiveSgdeDist.crossEntropy( trainSamples) stats['crossEntropyTestZeroSGDE'] = cvSgde stats['crossEntropyTestDiscretizedSGDE'] = cvPositiveSgde stats['numCandidates'] = int(candidateSearch.numCandidates()) stats['numCandidatesPerLevel'] = np.array( candidateSearch.numCandidatesPerLevel().array(), dtype="int") stats['numCandidatesPerIteration'] = np.array( candidateSearch.numCandidatesPerIteration().array(), dtype="int") stats[ 'costsCandidateSearch'] = candidateSearch.costsComputingCandidates( ) stats['costsCandidateSearchBinomial'] = int(C.sum()) stats['costsCandidateSearchPerIteration'] = np.array( candidateSearch.costsComputingCandidatesPerIteration( ).array(), dtype="int") stats['costsCandidateSearchPerIterationBinomial'] = C if plot and numDims == 2: fig = plt.figure() plotSG2d( positiveGrid, positiveAlpha, show_negative=True, show_grid_points=False, colorbarLabel= r"$f_{\mathcal{I}^\text{SG} \cup \mathcal{I}^\text{ext}}$" ) plt.title(r"positive: $N=%i/%i$; \# comparisons$=%i$" % (positiveGrid.getSize(), (2**maxLevel - 1)**numDims, numComparisons)) plt.xlabel(r"$\xi_1$") plt.ylabel(r"$\xi_2$") # plt.title(r"N=%i $\rightarrow$ %i: log=%g $\rightarrow$ %g" % (sgdeDist.grid.getSize(), # positiveSgdeDist.grid.getSize(), # cvSgde, # cvPositiveSgde)) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) fig, ax, _ = plotSG3d(positiveGrid, positiveAlpha) ax.set_zlabel( r"$f_{\mathcal{I}^{\text{SG}} \cup \mathcal{I}^\text{ext}}(\xi_1, \xi_2)$", fontsize=20) ax.set_xlabel(r"$\xi_1$", fontsize=20) ax.set_ylabel(r"$\xi_2$", fontsize=20) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) if plot and numDims == 2 and not out: plt.show() if out: # save stats filename = os.path.join( "data", label, "stats_d%i_a%i_r%i_i%i_%s_%s.pkl" % (numDims, 1, refinementSteps, iteration, candidates, interpolation)) fd = open(filename, "w") pkl.dump(stats, fd) fd.close() print("stats saved to -> '%s'" % filename) # dictionary that stores the information on the estimated densities myjson = { "Grid": { "dimNames": ["phi", "log(K_A)"], "matrixEntries": ["phi", "log(K_A)"] }, "Set": { "path": "", "grids": [], "alphas": [], "paramValues": [], "paramName": "grid_size" } } for key, result in list(results.items()): config = result['config'] dist = result['dist'] # serialize grid and coefficients out = "sgde.i%i.k%i.N%i" % (iteration, key, dist.grid.getSize()) out_grid = os.path.join(pathResults, "%s.grid" % out) out_alpha = os.path.join(pathResults, "%s.alpha.arff" % out) writeGrid(out_grid, dist.grid) writeAlphaARFF(out_alpha, dist.alpha) # collect information for json myjson["Set"]["grids"].append(os.path.abspath(out_grid)) myjson["Set"]["alphas"].append(os.path.abspath(out_alpha)) myjson["Set"]["paramValues"].append(crossEntropies[key]) # ----------------------------------------------------------- # serialize the config out_config = os.path.join(pathResults, "sgde.i%i.k%i.config" % (iteration, key)) fd = open(out_config, "w") json.dump(config, fd, ensure_ascii=True, indent=True) fd.close() crossEntropies[key] = (crossEntropies[key], out_grid, out_alpha, out_config) # sort the results in myjson according to the cross entropy ixs = np.argsort(myjson["Set"]["paramValues"]) myjson["Set"]["grids"] = [myjson["Set"]["grids"][ix] for ix in ixs] myjson["Set"]["alphas"] = [myjson["Set"]["alphas"][ix] for ix in ixs] myjson["Set"]["paramValues"] = [ myjson["Set"]["paramValues"][ix] for ix in ixs ] # serialize myjson out_config = os.path.join(pathResults, "sgde_visualization.i%i.config" % iteration) fd = open(out_config, "w") json.dump(myjson, fd, ensure_ascii=True, indent=True) fd.close() # serialize cross entropies out_crossEntropies = os.path.join( pathResults, "sgde_cross_entropies.i%i.csv" % iteration) fd = open(out_crossEntropies, 'wb') file_writer = csv.writer(fd) file_writer.writerow(["crossEntropy", "grid", "alpha", "sgdeConfig"]) for out in list(crossEntropies.values()): file_writer.writerow(out) fd.close() # serialize samples np.savetxt( os.path.join(pathResults, "sgde_train_samples.i%i.csv" % iteration), trainSamples) np.savetxt( os.path.join(pathResults, "sgde_test_samples.i%i.csv" % iteration), testSamples) # serialize best configuration to json out_bestDist = os.path.join(pathResults, "sgde_best_config.i%i.json" % iteration) text = bestDist.toJson() fd = open(out_bestDist, "w") fd.write(text) fd.close() return bestDist, stats
def runAnalysis(self, analysis, uqManager, alabel, blabel, out, plot, results): if out: # ---------------------------------------------- # write stats # ---------------------------------------------- pathResults = os.path.join(self.pathResults, alabel, blabel) if not os.path.exists(pathResults): os.mkdir(pathResults) if self.numDims > 1: print("sobol indices") analysis.writeSensitivityValues(os.path.join(pathResults, alabel)) print("surpluses") analysis.writeSurplusesLevelWise(os.path.join(pathResults, alabel)) print("stats") analysis.writeStats(os.path.join(pathResults, alabel)) print("moments") analysis.writeMoments(os.path.join(pathResults, alabel)) print("sampling") path = os.path.join(pathResults, "samples") if not os.path.exists(path): os.mkdir(path) analysis.sampleGrids(os.path.join(path, alabel)) # ---------------------------------------------- # do some plotting # ---------------------------------------------- ts = uqManager.getKnowledge().getAvailableTimeSteps() sobolIndices = np.zeros((len(ts), 2 ** len(self.params.activeParams()) - 1)) for i, t in enumerate(ts): grid, alpha = uqManager.getKnowledge()\ .getSparseGridFunction(uqManager.getQoI(), t) print("-" * 80) print("plot: t=%g (i=%i), N=%i" % (t, i, grid.getSize())) # scatter plot of surpluses level wise surpluses = analysis.computeSurplusesLevelWise(t) maxLevel = grid.getStorage().getMaxLevel() if out and plot: fig = plotSurplusLevelWise(surpluses, maxLevel) fig.savefig(os.path.join(pathResults, "surpluses_t%g") % t) plt.close(fig) (fig, ax), A = plotSGNodal3d(grid, alpha) ax.set_xlabel("x") ax.set_ylabel("y") fig.savefig(os.path.join(pathResults, "nodal_t%g.png" % t)) plt.close(fig) # plot sparse grid approximation if self.numDims < 3: if self.numDims == 1: fig = plt.figure() plotSG1d(grid, alpha) plt.xlabel("x") elif self.numDims == 2: fig, ax, _ = plotSG3d(grid, alpha) ax.set_xlabel("x") ax.set_ylabel("y") fig.savefig(os.path.join(pathResults, "function_t%g.png" % t)) plt.close(fig) # write nodal values to file writeDataARFF({"filename": os.path.join(pathResults, "nodal_t%g.arff" % t), "names": self.params.activeParams().getNames() + ["value"], "data": DataMatrix(A)}) # show sobol indices me = None te = None if self.numDims > 1: anova = analysis.getAnovaDecomposition(t=t) me = anova.getSobolIndices() print("-------------- Sobol Indices (t = %i) ------------------" % t) for j, perm in enumerate(anova.getSortedPermutations(list(me.keys()))): print("%s: %s" % (perm, me[perm])) sobolIndices[i, j] = me[perm] print(sum(sobolIndices[i, :]), "==", 1) # ---------------------------------------------------------- # total effects te = anova.getTotalEffects() print("-------------- Total Effects (t = %i) -----------------" % t) for key, val in sorted(te.items()): print("%s: %s" % (key, val)) print("---------------------------------------------------------") print() if t not in results["results"]: results["results"][t] = {} results["knowledge_types"] = uqManager.getKnowledgeTypes() results["results"][t][maxLevel] = {} results["results"][t][maxLevel]["grid_size"] = grid.getSize() results["results"][t][maxLevel]["maxLevel"] = maxLevel results["results"][t][maxLevel]["surpluses"] = surpluses results["results"][t][maxLevel]["sobol_indices"] = me results["results"][t][maxLevel]["total_effects"] = te results["results"][t][maxLevel]["stats"] = uqManager.stats results["results"][t][maxLevel]["mean_estimated_per_iteration"] = {} for it, res in list(analysis.mean(ts=[t], reduce=False).items()): results["results"][t][maxLevel]["mean_estimated_per_iteration"][it] = res["value"] # maximum iteration -> final value it = max(results["results"][t][maxLevel]["mean_estimated_per_iteration"].keys()) results["results"][t][maxLevel]["mean_estimated"] = \ results["results"][t][maxLevel]["mean_estimated_per_iteration"][it] results["results"][t][maxLevel]["var_estimated_per_iteration"] = {} for it, res in list(analysis.var(ts=[t], reduce=False).items()): results["results"][t][maxLevel]["var_estimated_per_iteration"][it] = res["value"] # maximum iteration -> final value it = max(results["results"][t][maxLevel]["var_estimated_per_iteration"].keys()) results["results"][t][maxLevel]["var_estimated"] = \ results["results"][t][maxLevel]["var_estimated_per_iteration"][it] # -------------------------------------------- if out and plot and self.numDims > 1: names = anova.getSortedPermutations(list(me.keys())) fig = plotSobolIndices(sobolIndices, ts=ts, legend=True, names=names) fig.savefig(os.path.join(pathResults, "sobol.png")) plt.close(fig)
print("l=%i: (gs=%i)" % (level, grid.getSize())) print("-" * 80) # plot the result if plot and numDims < 3: fig = plt.figure() if numDims == 1: plotSG1d(grid, alpha) elif numDims == 2: plotSG2d(grid, alpha, show_negative=False, show_grid_points=True) plt.title(r"$\ell = %i, N = %i$" % (level, grid.getStorage().getSize())) fig.show() if numDims == 2: fig, ax, _ = plotSG3d(grid, alpha, grid_points_at=-2) ax.set_title(r"$\ell = %i, N = %i$" % (level, grid.getStorage().getSize())) ax.set_zlim(-2, 2) fig.show() plt.savefig("sin_sg_negative.pdf") plt.close(fig) if side == "lower": sides = ["lower"] elif side == "upper": sides = ["upper"] else: # both sides = ["lower", "upper"] if code == "c++":
dist = SGDEdist.byLearnerSGDEConfig(samples, config={"grid_level": 6, "grid_type": "Linear", "refinement_numSteps": 0, "refinement_numPoints": 3, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": False}, bounds=U.getBounds()) fig, ax = plotDensity3d(U) ax.set_title("true density") fig.show() fig, ax, _ = plotSG3d(dist.grid, dist.alpha) ax.set_title("estimated density") fig.show() print("mean = %g ~ %g" % (m.prod(), dist.mean())) print("var = %g ~ %g" % (np.var(testSamples), dist.var())) print("KL-divergence = %g" % U.klDivergence(dist, testSamples, testSamples)) print("cross entropy = %g" % dist.crossEntropy(testSamples)) print("MSE = %g" % dist.l2error(U, testSamples, testSamples)) # sampling uniform_samples = np.random.random((1000, 2)) samples = dist.ppf(uniform_samples) fig = plt.figure() plt.scatter(samples[:, 0], samples[:, 1])
# ---------------------------------------------- # first run while uqManager.hasMoreSamples(): uqManager.runNextSamples() # ---------------------------------------------- # build analysis analysis = ASGCAnalysisBuilder().withUQManager(uqManager)\ .withAnalyticEstimationStrategy()\ .andGetResult() analysis.computeMoments()['data'] # ---------------------------------------------- fig, _, _ = plotSG3d(analysis.getGrid(), analysis.getSurpluses()) fig.show() # ---------------------------------------------- # show sobol indices # anova decomposition anova = analysis.getAnovaDecomposition(nk=len(params)) # main effects me = anova.getSobolIndices() te = anova.getTotalEffects() names = anova.getSortedPermutations(list(me.keys())) values = [me[name] for name in names] fig = plotSobolIndices(values, legend=True, names=names) fig.show()
print( "l=%i: (gs=%i) -> %g (%g, %g)," % (level, sgdeDist.grid.getSize(), dist.klDivergence(sgdeDist, testSamples), sgdeDist.crossEntropy(testSamples), sgdeDist.vol)) print("-" * 80) if numDims == 2 and plot: # plot the result fig = plt.figure() plotGrid2d(grid, alpha, show_numbers=False) # plt.title("neg: #gp = %i, kldivergence = %g, log = %g" % (grid.getStorage().getSize(), # dist.klDivergence(sgdeDist, testSamples), # dist.crossEntropy(testSamples))) fig.show() fig, ax, _ = plotSG3d(grid, sgdeDist.alpha) ax.set_title("negative") fig.show() C = 0 M = np.sum([1 for i in range(len(alpha)) if alpha[i] < 0]) for d in range(2, numDims + 1): C += binom(M, d) print("predicted comparison costs = %i" % C) print("full grid = %i" % ((2**level - 1)**numDims, )) if code == "c++": alpha_vec = DataVector(alpha) opMakePositive = createOperationMakePositive(candidateSearchAlgorithm, interpolationAlgorithm, consistentGrid, verbose)