def estimate(self, A, grid, alpha, k, U, T): r""" Extraction of the expectation the given sg function by assuming constant distribution function in the support range of each node. """ gs = grid.getStorage() def f(p): val = evalSGFunction(grid, alpha, p) return val ** k n_grid, n_alpha = discretize(grid, alpha, f, refnums=0) # add the density measure for i in range(gs.size()): p = [gs.getCoordinates(gs.getPoint(i), j) for j in range(gs.getDimension())] q = U.pdf(tr.trans(p), marginal=True) n_alpha[i] *= prod(q) # Estimate the expectation value return A * doQuadrature(n_grid, n_alpha)
def estimate(self, A, grid, alpha, k, U, T): r""" Extraction of the expectation the given sg function by assuming constant distribution function in the support range of each node. """ gs = grid.getStorage() def f(p): val = evalSGFunction(grid, alpha, p) return val ** k n_grid, n_alpha = discretize(grid, alpha, f, refnums=0) # add the density measure for i in xrange(gs.size()): p = [gs.get(i).getCoord(j) for j in range(gs.dim())] q = U.pdf(tr.trans(p), marginal=True) n_alpha[i] *= prod(q) # Estimate the expectation value return A * doQuadrature(n_grid, n_alpha)
def estimateSGDEDensity(functionName, trainSamples, testSamples=None, bounds=None, iteration=0, plot=False, out=True, label="sgde_zero", candidates="intersections", interpolation="setToZero"): print("train: %i x %i (mean=%g, var=%g)" % (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples), np.var(trainSamples))) if testSamples is not None: print("test : %i x %i (mean=%g, var=%g)" % (testSamples.shape[0], testSamples.shape[1], np.mean(testSamples), np.var(testSamples))) candidateSearchAlgorithm = strToCandidateSearchAlgorithm(candidates) interpolationAlgorithm = strToInterpolationAlgorithm(interpolation) results = {} crossEntropies = {} config = { "grid_level": 1, "grid_type": "linear", "grid_maxDegree": 1, "refinement_numSteps": 0, "refinement_numPoints": 3, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_enable": True, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": False } pathResults = os.path.join("data", label) key = 1 bestCV = float("Inf") bestDist = None # stats stats = { 'config': { 'functionName': functionName, 'numDims': 2, 'adaptive': True, 'refnums': 0, 'consistentGrid': True, 'candidateSearchAlgorithm': candidates, 'interpolationAlgorithm': interpolation, 'maxNumGridPoints': 0, 'iteration': iteration }, 'trainSamples': trainSamples, 'testSamples': testSamples } for level in range(2, 7): print("-" * 60) print("l=%i" % level) for refinementSteps in range(0, 5): config["grid_level"] = level config["refinement_numSteps"] = refinementSteps sgdeDist = SGDEdist.byLearnerSGDEConfig(trainSamples, config=config, bounds=bounds) # ----------------------------------------------------------- grid, alpha = sgdeDist.grid, sgdeDist.alpha cvSgde = sgdeDist.crossEntropy(testSamples) maxLevel = grid.getStorage().getMaxLevel() numDims = grid.getStorage().getDimension() print(" " + "-" * 30) print(" #ref = %i: gs=%i -> CV test = %g" % (refinementSteps, sgdeDist.grid.getSize(), cvSgde)) # ----------------------------------------------------------- # make it positive positiveGrid = grid.clone() positiveAlpha_vec = DataVector(alpha) opPositive = createOperationMakePositive(candidateSearchAlgorithm, interpolationAlgorithm, True, False) opPositive.makePositive(positiveGrid, positiveAlpha_vec, True) # scale to unit integrand positiveAlpha = positiveAlpha_vec.array() positiveSgdeDist = SGDEdist(positiveGrid, positiveAlpha, trainSamples, bounds=bounds) # ----------------------------------------------------------- cvPositiveSgde = positiveSgdeDist.crossEntropy(testSamples) if plot and numDims == 2: fig = plt.figure() plotSG2d(grid, alpha, show_negative=True, show_grid_points=True) plt.title("pos: N=%i: vol=%g, log=%g" % (positiveGrid.getSize(), doQuadrature(positiveGrid, positiveAlpha), cvPositiveSgde)) plt.tight_layout() if out: plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) else: plt.close(fig) # ----------------------------------------------------------- print(" positive: gs=%i -> CV test = %g" % (positiveGrid.getSize(), cvPositiveSgde)) # ----------------------------------------------------------- # select the best density available based on the given criterion results[key] = {'config': config, 'dist': positiveSgdeDist} crossEntropies[key] = cvPositiveSgde key += 1 candidateSearch = opPositive.getCandidateSetAlgorithm() if cvPositiveSgde < bestCV: bestCV = cvPositiveSgde bestDist = positiveSgdeDist numComparisons = candidateSearch.costsComputingCandidates() # update the stats -> just for the current best one # write the stats of the current best results to the stats dict C = np.ndarray(numDims - 1, dtype="int") M = np.sum([1 for i in range(len(alpha)) if alpha[i] < 0]) for d in range(2, numDims + 1): C[d - 2] = binom(M, d) stats['config']['refnums'] = refinementSteps stats['config']['adaptive'] = refinementSteps > 0 stats['negSGDE_json'] = sgdeDist.toJson() stats['posSGDE_json'] = positiveSgdeDist.toJson() stats['level'] = level stats['maxLevel'] = maxLevel stats['fullGridSize'] = (2**maxLevel - 1)**numDims stats['sparseGridSize'] = grid.getSize() stats['discretizedGridSize'] = positiveGrid.getSize() stats['crossEntropyTrainZeroSGDE'] = sgdeDist.crossEntropy( trainSamples) stats[ 'crossEntropyTrainDiscretizedSGDE'] = positiveSgdeDist.crossEntropy( trainSamples) stats['crossEntropyTestZeroSGDE'] = cvSgde stats['crossEntropyTestDiscretizedSGDE'] = cvPositiveSgde stats['numCandidates'] = int(candidateSearch.numCandidates()) stats['numCandidatesPerLevel'] = np.array( candidateSearch.numCandidatesPerLevel().array(), dtype="int") stats['numCandidatesPerIteration'] = np.array( candidateSearch.numCandidatesPerIteration().array(), dtype="int") stats[ 'costsCandidateSearch'] = candidateSearch.costsComputingCandidates( ) stats['costsCandidateSearchBinomial'] = int(C.sum()) stats['costsCandidateSearchPerIteration'] = np.array( candidateSearch.costsComputingCandidatesPerIteration( ).array(), dtype="int") stats['costsCandidateSearchPerIterationBinomial'] = C if plot and numDims == 2: fig = plt.figure() plotSG2d( positiveGrid, positiveAlpha, show_negative=True, show_grid_points=False, colorbarLabel= r"$f_{\mathcal{I}^\text{SG} \cup \mathcal{I}^\text{ext}}$" ) plt.title(r"positive: $N=%i/%i$; \# comparisons$=%i$" % (positiveGrid.getSize(), (2**maxLevel - 1)**numDims, numComparisons)) plt.xlabel(r"$\xi_1$") plt.ylabel(r"$\xi_2$") # plt.title(r"N=%i $\rightarrow$ %i: log=%g $\rightarrow$ %g" % (sgdeDist.grid.getSize(), # positiveSgdeDist.grid.getSize(), # cvSgde, # cvPositiveSgde)) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) fig, ax, _ = plotSG3d(positiveGrid, positiveAlpha) ax.set_zlabel( r"$f_{\mathcal{I}^{\text{SG}} \cup \mathcal{I}^\text{ext}}(\xi_1, \xi_2)$", fontsize=20) ax.set_xlabel(r"$\xi_1$", fontsize=20) ax.set_ylabel(r"$\xi_2$", fontsize=20) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) if plot and numDims == 2 and not out: plt.show() if out: # save stats filename = os.path.join( "data", label, "stats_d%i_a%i_r%i_i%i_%s_%s.pkl" % (numDims, 1, refinementSteps, iteration, candidates, interpolation)) fd = open(filename, "w") pkl.dump(stats, fd) fd.close() print("stats saved to -> '%s'" % filename) # dictionary that stores the information on the estimated densities myjson = { "Grid": { "dimNames": ["phi", "log(K_A)"], "matrixEntries": ["phi", "log(K_A)"] }, "Set": { "path": "", "grids": [], "alphas": [], "paramValues": [], "paramName": "grid_size" } } for key, result in list(results.items()): config = result['config'] dist = result['dist'] # serialize grid and coefficients out = "sgde.i%i.k%i.N%i" % (iteration, key, dist.grid.getSize()) out_grid = os.path.join(pathResults, "%s.grid" % out) out_alpha = os.path.join(pathResults, "%s.alpha.arff" % out) writeGrid(out_grid, dist.grid) writeAlphaARFF(out_alpha, dist.alpha) # collect information for json myjson["Set"]["grids"].append(os.path.abspath(out_grid)) myjson["Set"]["alphas"].append(os.path.abspath(out_alpha)) myjson["Set"]["paramValues"].append(crossEntropies[key]) # ----------------------------------------------------------- # serialize the config out_config = os.path.join(pathResults, "sgde.i%i.k%i.config" % (iteration, key)) fd = open(out_config, "w") json.dump(config, fd, ensure_ascii=True, indent=True) fd.close() crossEntropies[key] = (crossEntropies[key], out_grid, out_alpha, out_config) # sort the results in myjson according to the cross entropy ixs = np.argsort(myjson["Set"]["paramValues"]) myjson["Set"]["grids"] = [myjson["Set"]["grids"][ix] for ix in ixs] myjson["Set"]["alphas"] = [myjson["Set"]["alphas"][ix] for ix in ixs] myjson["Set"]["paramValues"] = [ myjson["Set"]["paramValues"][ix] for ix in ixs ] # serialize myjson out_config = os.path.join(pathResults, "sgde_visualization.i%i.config" % iteration) fd = open(out_config, "w") json.dump(myjson, fd, ensure_ascii=True, indent=True) fd.close() # serialize cross entropies out_crossEntropies = os.path.join( pathResults, "sgde_cross_entropies.i%i.csv" % iteration) fd = open(out_crossEntropies, 'wb') file_writer = csv.writer(fd) file_writer.writerow(["crossEntropy", "grid", "alpha", "sgdeConfig"]) for out in list(crossEntropies.values()): file_writer.writerow(out) fd.close() # serialize samples np.savetxt( os.path.join(pathResults, "sgde_train_samples.i%i.csv" % iteration), trainSamples) np.savetxt( os.path.join(pathResults, "sgde_test_samples.i%i.csv" % iteration), testSamples) # serialize best configuration to json out_bestDist = os.path.join(pathResults, "sgde_best_config.i%i.json" % iteration) text = bestDist.toJson() fd = open(out_bestDist, "w") fd.write(text) fd.close() return bestDist, stats
grid = Grid.createLinearGrid(2) grid.getGenerator().regular(level) gs = grid.getStorage() nodalValues = DataVector(grid.getSize()) p = DataVector(gs.getDimension()) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) nodalValues[i] = dist.pdf(p.array()) alpha = hierarchize(grid, nodalValues) # plot the result fig = plt.figure() plotSG2d(grid, alpha) plt.title("plotSG: vol = %g" % (doQuadrature(grid, alpha))) fig.show() sgdeDist = SGDEdist(grid, alpha) fig = plt.figure() plotSGDE2d(sgdeDist) plt.title("plotSGDE: vol = %g" % (doQuadrature(grid, alpha))) fig.show() fig = plt.figure() plotDensity2d(sgdeDist) plt.title("plotDensity: vol = %g" % (doQuadrature(grid, alpha))) fig.show() plt.show()
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join(pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join(pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join(pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join(pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join(pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join(pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError('optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = {'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy} # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join(pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- return ans
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join( pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join( pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join( pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join( pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join( pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join( pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError( 'optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = { 'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy } # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join( pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join( pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack( ([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- return ans
def estimate(self, vol, grid, alpha, f, U, T): r""" Extraction of the expectation the given sparse grid function interpolating the product of function value and pdf. \int\limits_{[0, 1]^d} f(x) * pdf(x) dx """ # extract correct pdf for moment estimation vol, W = self.__extractPDFforMomentEstimation(U, T) # check if there are just uniform distributions given if all([isinstance(dist, Uniform) for dist in W.getDistributions()]): # for uniformly distributed RVS holds: vol * pdf(x) = 1 vol = 1 u = f else: # interpolate u(x) = f_N^k(x) * pdf(x) def u(p, val): """ function to be interpolated @param p: coordinates of collocation nodes @param val: sparse grid function value at position p """ q = W.pdf(T.unitToProbabilistic(p), marginal=True) return f(p, val) * np.prod(q) # discretize the function f on a sparse grid # pdf_grid, pdf_alpha, pdf_err = U.discretize() # n_grid, n_alpha, m_err = discretizeProduct(f, # grid, alpha, # pdf_grid, pdf_alpha, # refnums=self.__refnums, # epsilon=self.__epsilon) n_grid, n_alpha, err = discretize(grid, alpha, u, refnums=self.__refnums, pointsNum=self.__pointsNum, epsilon=self.__epsilon, level=self.level, deg=self.__deg) moment = vol * doQuadrature(n_grid, n_alpha) if abs(moment) > 1e20: print moment print n_grid.getSize(), len(alpha) import pdb; pdb.set_trace() # print "-" * 60 # print evalSGFunction(m_grid, m_alpha, DataVector([0.5, 0.5])), u([0.5, 0.5], None) # print evalSGFunction(n_grid, n_alpha, DataVector([0.5, 0.5])), u([0.5, 0.5], None) # print "-" * 60 # # # do the quadrature on the new grid # m_moment = vol * doQuadrature(m_grid, m_alpha) # # print m_moment # print n_moment # # import pdb; pdb.set_trace() return moment, err[1]
def computeBF(grid, U, admissibleSet): """ Compute bilinear form (A)_ij = \int phi_i phi_j dU(x) on measure U, which is in this case supposed to be a lebesgue measure. @param grid: Grid, sparse grid @param U: list of distributions, Lebeasgue measure @param admissibleSet: AdmissibleSet @return: DataMatrix """ gs = grid.getStorage() basis = getBasis(grid) # interpolate phi_i phi_j on sparse grid with piecewise polynomial SG # the product of two piecewise linear functions is a piecewise # polynomial one of degree 2. ngrid = Grid.createPolyBoundaryGrid(1, 2) ngrid.getGenerator().regular(2) ngs = ngrid.getStorage() nodalValues = DataVector(ngs.size()) A = DataMatrix(admissibleSet.getSize(), gs.size()) b = DataVector(admissibleSet.getSize()) s = np.ndarray(gs.getDimension(), dtype='float') # # pre compute basis evaluations # basis_eval = {} # for li in xrange(1, gs.getMaxLevel() + 1): # for i in xrange(1, 2 ** li + 1, 2): # # add value with it self # x = 2 ** -li * i # basis_eval[(li, i, li, i, x)] = basis.eval(li, i, x) * \ # basis.eval(li, i, x) # # # left side # x = 2 ** -(li + 1) * (2 * i - 1) # basis_eval[(li, i, li, i, x)] = basis.eval(li, i, x) * \ # basis.eval(li, i, x) # # right side # x = 2 ** -(li + 1) * (2 * i + 1) # basis_eval[(li, i, li, i, x)] = basis.eval(li, i, x) * \ # basis.eval(li, i, x) # # # add values for hierarchical lower nodes # for lj in xrange(li + 1, gs.getMaxLevel() + 1): # a = 2 ** (lj - li) # j = a * i - a + 1 # while j < a * i + a: # # center # x = 2 ** -lj * j # basis_eval[(li, i, lj, j, x)] = basis.eval(li, i, x) * \ # basis.eval(lj, j, x) # basis_eval[(lj, j, li, i, x)] = basis_eval[(li, i, lj, j, x)] # # left side # x = 2 ** -(lj + 1) * (2 * j - 1) # basis_eval[(li, i, lj, j, x)] = basis.eval(li, i, x) * \ # basis.eval(lj, j, x) # basis_eval[(lj, j, li, i, x)] = basis_eval[(li, i, lj, j, x)] # # right side # x = 2 ** -(lj + 1) * (2 * j + 1) # basis_eval[(li, i, lj, j, x)] = basis.eval(li, i, x) * \ # basis.eval(lj, j, x) # basis_eval[(lj, j, li, i, x)] = basis_eval[(li, i, lj, j, x)] # j += 2 # # print len(basis_eval) # run over all rows for i, gpi in enumerate(admissibleSet.values()): # run over all columns for j in range(gs.size()): # print "%i/%i" % (i * gs.size() + j + 1, gs.size() ** 2) gpj = gs.getPoint(j) for d in range(gs.getDimension()): # get level index lid, iid = gpi.getLevel(d), gpi.getIndex(d) ljd, ijd = gpj.getLevel(d), gpj.getIndex(d) # compute left and right boundary of the support of both # basis functions lb = max([(iid - 1) * 2 ** -lid, (ijd - 1) * 2 ** -ljd]) ub = min([(iid + 1) * 2 ** -lid, (ijd + 1) * 2 ** -ljd]) # same level, different index if lid == ljd and iid != ijd: s[d] = 0. # the support does not overlap elif lid != ljd and lb >= ub: s[d] = 0. else: # ---------------------------------------------------- # do the 1d interpolation ... # define transformation function T = LinearTransformation(lb, ub) for k in range(ngs.size()): x = ngs.getCoordinate(ngs.getPoint(k), 0) x = T.unitToProbabilistic(x) nodalValues[k] = basis.eval(lid, iid, x) * \ basis.eval(ljd, ijd, x) # ... by hierarchization v = hierarchize(ngrid, nodalValues) # discretize the following function def f(x, y): xp = T.unitToProbabilistic(x) return float(y * U[d].pdf(xp)) # sparse grid quadrature g, w, _ = discretize(ngrid, v, f, refnums=0, level=5, useDiscreteL2Error=False) s[d] = doQuadrature(g, w) * (ub - lb) # fig = plt.figure() # plotSG1d(ngrid, v) # x = np.linspace(xlow, ub, 100) # plt.plot(np.linspace(0, 1, 100), U[d].pdf(x)) # fig.show() # fig = plt.figure() # plotSG1d(g, w) # x = np.linspace(0, 1, 100) # plt.plot(x, # [evalSGFunction(ngrid, v, DataVector([xi])) * U[d].pdf(T.unitToProbabilistic(xi)) for xi in x]) # fig.show() # plt.show() # compute the integral of it # ---------------------------------------------------- A.set(i, j, float(np.prod(s))) if gs.getSequenceNumber(gpi) == j: b[i] = A.get(i, j) return A, b
def computeBF(grid, U, admissibleSet): """ Compute bilinear form (A)_ij = \int phi_i phi_j dU(x) on measure U, which is in this case supposed to be a lebesgue measure. @param grid: Grid, sparse grid @param U: list of distributions, Lebeasgue measure @param admissibleSet: AdmissibleSet @return: DataMatrix """ gs = grid.getStorage() basis = getBasis(grid) # interpolate phi_i phi_j on sparse grid with piecewise polynomial SG # the product of two piecewise linear functions is a piecewise # polynomial one of degree 2. ngrid = Grid.createPolyBoundaryGrid(1, 2) ngrid.createGridGenerator().regular(2) ngs = ngrid.getStorage() nodalValues = DataVector(ngs.size()) A = DataMatrix(admissibleSet.getSize(), gs.size()) b = DataVector(admissibleSet.getSize()) s = np.ndarray(gs.dim(), dtype='float') # # pre compute basis evaluations # basis_eval = {} # for li in xrange(1, gs.getMaxLevel() + 1): # for i in xrange(1, 2 ** li + 1, 2): # # add value with it self # x = 2 ** -li * i # basis_eval[(li, i, li, i, x)] = basis.eval(li, i, x) * \ # basis.eval(li, i, x) # # # left side # x = 2 ** -(li + 1) * (2 * i - 1) # basis_eval[(li, i, li, i, x)] = basis.eval(li, i, x) * \ # basis.eval(li, i, x) # # right side # x = 2 ** -(li + 1) * (2 * i + 1) # basis_eval[(li, i, li, i, x)] = basis.eval(li, i, x) * \ # basis.eval(li, i, x) # # # add values for hierarchical lower nodes # for lj in xrange(li + 1, gs.getMaxLevel() + 1): # a = 2 ** (lj - li) # j = a * i - a + 1 # while j < a * i + a: # # center # x = 2 ** -lj * j # basis_eval[(li, i, lj, j, x)] = basis.eval(li, i, x) * \ # basis.eval(lj, j, x) # basis_eval[(lj, j, li, i, x)] = basis_eval[(li, i, lj, j, x)] # # left side # x = 2 ** -(lj + 1) * (2 * j - 1) # basis_eval[(li, i, lj, j, x)] = basis.eval(li, i, x) * \ # basis.eval(lj, j, x) # basis_eval[(lj, j, li, i, x)] = basis_eval[(li, i, lj, j, x)] # # right side # x = 2 ** -(lj + 1) * (2 * j + 1) # basis_eval[(li, i, lj, j, x)] = basis.eval(li, i, x) * \ # basis.eval(lj, j, x) # basis_eval[(lj, j, li, i, x)] = basis_eval[(li, i, lj, j, x)] # j += 2 # # print len(basis_eval) # run over all rows for i, gpi in enumerate(admissibleSet.values()): # run over all columns for j in xrange(gs.size()): # print "%i/%i" % (i * gs.size() + j + 1, gs.size() ** 2) gpj = gs.get(j) for d in xrange(gs.dim()): # get level index lid, iid = gpi.getLevel(d), gpi.getIndex(d) ljd, ijd = gpj.getLevel(d), gpj.getIndex(d) # compute left and right boundary of the support of both # basis functions lb = max([(iid - 1) * 2 ** -lid, (ijd - 1) * 2 ** -ljd]) ub = min([(iid + 1) * 2 ** -lid, (ijd + 1) * 2 ** -ljd]) # same level, different index if lid == ljd and iid != ijd: s[d] = 0. # the support does not overlap elif lid != ljd and lb >= ub: s[d] = 0. else: # ---------------------------------------------------- # do the 1d interpolation ... # define transformation function T = LinearTransformation(lb, ub) for k in xrange(ngs.size()): x = ngs.get(k).getCoord(0) x = T.unitToProbabilistic(x) nodalValues[k] = basis.eval(lid, iid, x) * \ basis.eval(ljd, ijd, x) # ... by hierarchization v = hierarchize(ngrid, nodalValues) # discretize the following function def f(x, y): xp = T.unitToProbabilistic(x) return float(y * U[d].pdf(xp)) # sparse grid quadrature g, w, _ = discretize(ngrid, v, f, refnums=0, level=5, useDiscreteL2Error=False) s[d] = doQuadrature(g, w) * (ub - lb) # fig = plt.figure() # plotSG1d(ngrid, v) # x = np.linspace(xlow, ub, 100) # plt.plot(np.linspace(0, 1, 100), U[d].pdf(x)) # fig.show() # fig = plt.figure() # plotSG1d(g, w) # x = np.linspace(0, 1, 100) # plt.plot(x, # [evalSGFunction(ngrid, v, DataVector([xi])) * U[d].pdf(T.unitToProbabilistic(xi)) for xi in x]) # fig.show() # plt.show() # compute the integral of it # ---------------------------------------------------- A.set(i, j, float(np.prod(s))) if gs.seq(gpi) == j: b[i] = A.get(i, j) return A, b