def writeSurplusesLevelWise(self, filename): # locate all knowledge types available dtypes = self.__learner.getKnowledgeTypes() names = ['level'] for dtype in dtypes: names.append("surplus_%s" % KnowledgeTypes.toString(dtype)) ts = self.__knowledge.getAvailableTimeSteps() for t in ts: # collect all the surpluses classifying them by level sum data = {} n = 0 for dtype in dtypes: data[dtype] = self.computeSurplusesLevelWise(t, dtype) n = sum([len(values) for values in data[dtype].values()]) A = DataMatrix(n, len(names)) # add them to a matrix structure for i, dtype in enumerate(dtypes): k = 0 for level, surpluses in data[dtype].items(): for j, surplus in enumerate(surpluses): A.set(k + j, i + 1, surplus) A.set(k + j, 0, level) k += len(surpluses) writeDataARFF({'filename': "%s.t%s.surpluses.arff" % (filename, t), 'data': A, 'names': names})
def writeSurplusesLevelWise(self, filename): # locate all knowledge types available dtypes = self.__uqManager.getKnowledgeTypes() names = ['level'] for dtype in dtypes: names.append("surplus_%s" % KnowledgeTypes.toString(dtype)) ts = self.__knowledge.getAvailableTimeSteps() for t in ts: # collect all the surpluses classifying them by level sum data = {} n = 0 for dtype in dtypes: data[dtype] = self.computeSurplusesLevelWise(t, dtype) n = sum([len(values) for values in list(data[dtype].values())]) A = DataMatrix(n, len(names)) # add them to a matrix structure for i, dtype in enumerate(dtypes): k = 0 for level, surpluses in list(data[dtype].items()): for j, surplus in enumerate(surpluses): A.set(k + j, i + 1, surplus) A.set(k + j, 0, level) k += len(surpluses) writeDataARFF({ 'filename': "%s.t%s.surpluses.arff" % (filename, t), 'data': A, 'names': names })
def sampleGrids(self, filename): ts = self.__learner.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction( self._qoi, t) # init gs = grid.getStorage() dim = gs.dim() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data.transpose() data.appendRow() data.setRow(data.getNrows() - 1, res) data.transpose() # write results writeDataARFF({ 'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data, 'names': names }) # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = DataMatrix(gs.size(), dim) data.setAll(0.0) for i in xrange(gs.size()): gp = gs.get(i) v = np.array([gp.getCoord(j) for j in xrange(dim)]) data.setRow(i, DataVector(v)) # write results writeDataARFF({ 'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data, 'names': names }) # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def writeSensitivityValues(self, filename): def keymap(key): names = self.__uqManager.getParameters().activeParams().getNames() ans = [names[i] for i in key] return ",".join(ans) # parameters ts = self.__knowledge.getAvailableTimeSteps() gs = self.__knowledge.getGrid(self._qoi).getStorage() n = len(ts) n1 = gs.getDimension() n2 = 2**n1 - 1 data = DataMatrix(n, n1 + n2 + 1) names = ['time'] + [None] * (n1 + n2) for k, t in enumerate(ts): # estimated anova decomposition anova = self.getAnovaDecomposition(t=t) me = anova.getSobolIndices() if len(me) != n2: import ipdb ipdb.set_trace() n2 = len(me) te = anova.getTotalEffects() n1 = len(te) v = DataVector(n1 + n2 + 1) v.setAll(0.0) v[0] = t for i, key in enumerate( anova.getSortedPermutations(list(te.keys()))): v[i + 1] = te[key] if k == 0: names[i + 1] = '"$T_{' + keymap(key) + '}$"' for i, key in enumerate( anova.getSortedPermutations(list(me.keys()))): v[n1 + i + 1] = me[key] if k == 0: names[n1 + 1 + i] = '"$S_{' + keymap(key) + '}$"' data.setRow(k, v) writeDataARFF({ 'filename': filename + ".sa.stats.arff", 'data': data, 'names': names })
def sampleGrids(self, filename): ts = self.__uqManager.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction( self._qoi, t) # init gs = grid.getStorage() dim = gs.getDimension() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data = np.vstack((data.T, res)).T # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = np.ndarray((gs.getSize(), dim)) x = DataVector(dim) for i in range(gs.getSize()): gp = gs.getPoint(i) gs.getCoordinates(gp, x) data[i, :] = x.array() # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def sampleGrids(self, filename): ts = self.__learner.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction(self._qoi, t) # init gs = grid.getStorage() dim = gs.dim() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data.transpose() data.appendRow() data.setRow(data.getNrows() - 1, res) data.transpose() # write results writeDataARFF({'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data, 'names': names}) # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = DataMatrix(gs.size(), dim) data.setAll(0.0) for i in xrange(gs.size()): gp = gs.get(i) v = np.array([gp.getCoord(j) for j in xrange(dim)]) data.setRow(i, DataVector(v)) # write results writeDataARFF({'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data, 'names': names}) # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def writeSensitivityValues(self, filename): def keymap(key): names = self.getLearner().getParameters().activeParams().getNames() ans = [names[i] for i in key] return ",".join(ans) # parameters ts = self.__knowledge.getAvailableTimeSteps() gs = self.__knowledge.getGrid(self._qoi).getStorage() n = len(ts) n1 = gs.dim() n2 = 2 ** n1 - 1 data = DataMatrix(n, n1 + n2 + 1) names = ['time'] + [None] * (n1 + n2) for k, t in enumerate(ts): # estimated anova decomposition anova = self.getAnovaDecomposition(t=t) me = anova.getSobolIndices() if len(me) != n2: import ipdb; ipdb.set_trace() n2 = len(me) te = anova.getTotalEffects() n1 = len(te) v = DataVector(n1 + n2 + 1) v.setAll(0.0) v[0] = t for i, key in enumerate(anova.getSortedPermutations(te.keys())): v[i + 1] = te[key] if k == 0: names[i + 1] = '"$T_{' + keymap(key) + '}$"' for i, key in enumerate(anova.getSortedPermutations(me.keys())): v[n1 + i + 1] = me[key] if k == 0: names[n1 + 1 + i] = '"$S_{' + keymap(key) + '}$"' data.setRow(k, v) writeDataARFF({'filename': filename + ".sa.stats.arff", 'data': data, 'names': names})
def testSettings(self): if os.path.exists('testSetting.gz'): os.remove('testSetting.gz') # set distributions of the input parameters builder = ParameterBuilder() up = builder.defineUncertainParameters() up.new().isCalled('x').withUniformDistribution(0, 2) up.new().isCalled('y').withUniformDistribution(0, 2) # builder.withLinearTransformation() params = builder.andGetResult() uq_a = self.makeUQSetting() # prepare sample set points = np.random.rand(2, 2) samples = Samples(params) for point in points: samples.add(point, dtype=SampleType.ACTIVEUNIT) # run first test session uq_a.runSamples(samples) uq_a_json = uq_a.toJson() # restore results from file uq_b = self.makeUQSetting() uq_b_json = uq_b.toJson() # run second test session uq_b.runSamples(samples) # testing uq_c_json = uq_b.toJson() assert uq_b_json == uq_c_json assert uq_b_json == uq_a_json res = uq_b.getResults(qoi='x') assert list(res.keys()) == [0] for t, data in list(uq_b.toDataMatrix(qoi='x').items()): writeDataARFF({'filename': 'uqSetting_%g.arff' % t, 'data': data})
def writeMoments(self, filename): stats = self.computeMoments() stats['filename'] = filename + ".moments.arff" writeDataARFF(stats)
def estimateDensityDTrees(trainSamplesUnit, testSamplesUnit, testSamplesProb, pathResults="/tmp", dist=None, iteration=0, nSamples=1000): """ @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param iteration: @param nSamples: """ config = """ [general] method = denest [files] inFileTrain = %s usingTrain = 1:2 inFileTest = %s outFileTest = %s usingTest = 1:2 [denest] method = DensityTree normalize = true samplesNumberSamples = %i samplesOutput = %s printSurfaceFile = %s """ # write the samples to file n = trainSamplesUnit.shape[0] trainSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_train.csv" % (iteration, n)) testSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_test.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) np.savetxt(testSamplesUnitFile, testSamplesUnit) # define output files sampleFile = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) likelihoodFile = os.path.join( pathResults, "samples_%i_%i_likelihood.csv" % (iteration, n)) surfaceFile = os.path.join(pathResults, "samples_%i_%i.xyz" % (iteration, n)) jpegFile = os.path.join(pathResults, "samples_%i_%i.jpg" % (iteration, n)) configFile = os.path.join(pathResults, "trees_%i_%i.cfg" % (iteration, n)) gnuplotConfig = os.path.join(pathResults, "trees_%i_%i.gnuplot" % (iteration, n)) # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, testSamplesUnitFile, likelihoodFile, nSamples, sampleFile, surfaceFile)) fd.close() # estimate the density dtreesDist = DTreesDist.byConfig(configFile) # ----------------------------------------------------------- # do some plotting dtreesDist.gnuplot(jpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # collect statistics l2error = np.NAN kldivergence = np.NAN crossEntropy = dtreesDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(dtreesDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(dtreesDist, testSamplesUnit, testSamplesProb) stats = np.vstack( ([n], [l2error], [crossEntropy], [kldivergence])).transpose() # write results to file statsfilename = os.path.join(pathResults, "sg_dtrees_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix(stats), 'names': ['sampleSize', 'l2error', 'crossEntropy', 'KLDivergence'] }) return dtreesDist
def writeStats(self, filename): for dtype in self.__uqManager.getKnowledgeTypes(): stats = self.computeStats(dtype) suffix = KnowledgeTypes.toString(dtype) stats['filename'] = "%s.%s.stats.arff" % (filename, suffix) writeDataARFF(stats)
def estimateDensityKDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults='/tmp', dist=None, iteration=0, nSamples=100): """ @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param iteration: @param nSamples: """ config = """ [general] method = denest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [denest] method = DensityAGF normalize = true samplesNumberSamples = %i samplesOutput = %s printSurfaceFile = %s printBandwidthsFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 else: n, dim = trainSamplesUnit.shape if dim == 1: usingTrainTag = "%i" % dim else: usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # define output files sampleFile = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join(pathResults, "samples_%i_%i_likelihood.csv" % (iteration, n)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join(pathResults, "samples_%i_%i.xyz" % (iteration, n)) bandwidthFile = os.path.join(pathResults, "samples_%i_%i_bandwidth.csv" % (iteration, n)) jpegFile = os.path.join(pathResults, "samples_%i_%i.jpg" % (iteration, n)) configFile = os.path.join(pathResults, "libagf_%i_%i.cfg" % (iteration, n)) gnuplotConfig = os.path.join(pathResults, "libagf_%i_%i.gnuplot" % (iteration, n)) # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, nSamples, sampleFile, surfaceFile, bandwidthFile)) fd.close() agfDist = LibAGFDist.byConfig(configFile) # ----------------------------------------------------------- # do some plotting if dim == 2: agfDist.gnuplot(jpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # collect statistics l2error = np.NAN kldivergence = np.NAN crossEntropy = agfDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(agfDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(agfDist, testSamplesUnit, testSamplesProb) stats = np.vstack(([n], [l2error], [crossEntropy], [kldivergence])).transpose() # write results to file statsfilename = os.path.join(pathResults, "sg_libagf_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(stats), 'names': ['sampleSize', 'miseL2', 'crossEntropy', 'KLDivergence']}) return agfDist
def computeReferenceValues(cls, uqSetting, n=100): # ---------------------------------------------------------- # analytic reference values # ---------------------------------------------------------- g = uqSetting.getSimulation() numDims = cls.params.getStochasticDim() U = cls.params.getIndependentJointDistribution() computeWithMC = False if cls.param_setting == "uniform": print("computing analytic results") cls.E_ana = ((2. / 3.))**numDims, 0.0 if numDims == 1: cls.V_ana = (4. / 45.), 0.0 elif numDims == 2: cls.V_ana = (176. / 2025.), 0.0 elif numDims == 3: cls.V_ana = (60416. / 820125.), 0.0 elif numDims == 4: cls.V_ana = (1705984. / 36905625.), 0.0 else: computeWithMC = True else: if numDims == 1: print("computing analytic results 1d") cls.E_ana = quad(lambda x: g([x]) * U.pdf([x]), 0, 1) cls.V_ana = quad( lambda x: (g([x]) - cls.E_ana[0])**2 * U.pdf([x]), 0, 1) elif numDims == 2: print("computing analytic results 2d") cls.E_ana = dblquad(lambda x, y: g([x, y]) * U.pdf([x, y]), 0, 1, lambda x: 0, lambda x: 1) cls.V_ana = dblquad( lambda x, y: (g([x, y]) - cls.E_ana[0])**2 * U.pdf([x, y]), 0, 1, lambda x: 0, lambda x: 1) else: computeWithMC = True # ---------------------------------------------------------- # dicretize the stochastic space with Monte Carlo # ---------------------------------------------------------- print("computing monte carlo reference values") n -= uqSetting.getSize() if n > 0: mcSampler = MCSampler.withLatinHypercubeSampleGenerator( cls.params, n) samples = mcSampler.nextSamples(n) uqSetting.runSamples(samples) uqSetting.writeToFile() # ---------------------------------------------------------- # monte carlo reference values # ---------------------------------------------------------- res = uqSetting.getResults() analysis = MCAnalysis(cls.params, res) if computeWithMC: print("computing analytic results > 2d") cls.E_ana = analysis.mean() cls.V_ana = analysis.var() cls.refSize = len(res) # ---------------------------------------------- # write reference values to file # ---------------------------------------------- analysis.writeMoments("results/%s/%s.mc" % (cls.param_setting, cls.param_setting)) # write reference values to file stats = { 'data': [[cls.E_ana[0]], [cls.E_ana[1]], [cls.V_ana[0]], [cls.V_ana[1]]], 'names': ["mean", "meanError", "var", "varError"], 'filename': "results/%s/%s.ref.moments.arff" % (cls.param_setting, cls.param_setting) } writeDataARFF(stats) print("-" * 60) print("E(f) = %.14f, %g" % cls.E_ana) print("V(f) = %.14f, %g" % cls.V_ana) print("-" * 60)
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join( pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join( pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join( pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join( pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join( pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join( pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError( 'optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = { 'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy } # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join( pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join( pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack( ([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- return ans
def writeMoments(self, filename): stats = self.computeMoments() stats['filename'] = filename + ".moments.arff" writeDataARFF(stats)
def writeMoments(self, filename, *args, **kws): stats = self.computeMoments(*args, **kws) stats['filename'] = filename + ".moments.arff" writeDataARFF(stats)
def estimateDensityKDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults='/tmp', dist=None, iteration=0, nSamples=100): """ @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param iteration: @param nSamples: """ config = """ [general] method = denest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [denest] method = DensityAGF normalize = true samplesNumberSamples = %i samplesOutput = %s printSurfaceFile = %s printBandwidthsFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 else: n, dim = trainSamplesUnit.shape if dim == 1: usingTrainTag = "%i" % dim else: usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # define output files sampleFile = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join(pathResults, "samples_%i_%i_likelihood.csv" % (iteration, n)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join(pathResults, "samples_%i_%i.xyz" % (iteration, n)) bandwidthFile = os.path.join(pathResults, "samples_%i_%i_bandwidth.csv" % (iteration, n)) jpegFile = os.path.join(pathResults, "samples_%i_%i.jpg" % (iteration, n)) configFile = os.path.join(pathResults, "libagf_%i_%i.cfg" % (iteration, n)) gnuplotConfig = os.path.join(pathResults, "libagf_%i_%i.gnuplot" % (iteration, n)) # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, nSamples, sampleFile, surfaceFile, bandwidthFile)) fd.close() agfDist = LibAGFDist.byConfig(configFile) # ----------------------------------------------------------- # do some plotting if dim == 2: agfDist.gnuplot(jpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # collect statistics l2error = np.NAN kldivergence = np.NAN crossEntropy = agfDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(agfDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(agfDist, testSamplesUnit, testSamplesProb) stats = np.vstack(([n], [l2error], [crossEntropy], [kldivergence])).transpose() # write results to file statsfilename = os.path.join(pathResults, "sg_libagf_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(stats), 'names': ['sampleSize', 'miseL2', 'crossEntropy', 'KLDivergence']}) return agfDist
def estimateDensityDTrees(trainSamplesUnit, testSamplesUnit, testSamplesProb, pathResults="/tmp", dist=None, iteration=0, nSamples=1000): """ @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param iteration: @param nSamples: """ config = """ [general] method = denest [files] inFileTrain = %s usingTrain = 1:2 inFileTest = %s outFileTest = %s usingTest = 1:2 [denest] method = DensityTree normalize = true samplesNumberSamples = %i samplesOutput = %s printSurfaceFile = %s """ # write the samples to file n = trainSamplesUnit.shape[0] trainSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_train.csv" % (iteration, n)) testSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_test.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) np.savetxt(testSamplesUnitFile, testSamplesUnit) # define output files sampleFile = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) likelihoodFile = os.path.join(pathResults, "samples_%i_%i_likelihood.csv" % (iteration, n)) surfaceFile = os.path.join(pathResults, "samples_%i_%i.xyz" % (iteration, n)) jpegFile = os.path.join(pathResults, "samples_%i_%i.jpg" % (iteration, n)) configFile = os.path.join(pathResults, "trees_%i_%i.cfg" % (iteration, n)) gnuplotConfig = os.path.join(pathResults, "trees_%i_%i.gnuplot" % (iteration, n)) # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, testSamplesUnitFile, likelihoodFile, nSamples, sampleFile, surfaceFile)) fd.close() # estimate the density dtreesDist = DTreesDist.byConfig(configFile) # ----------------------------------------------------------- # do some plotting dtreesDist.gnuplot(jpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # collect statistics l2error = np.NAN kldivergence = np.NAN crossEntropy = dtreesDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(dtreesDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(dtreesDist, testSamplesUnit, testSamplesProb) stats = np.vstack(([n], [l2error], [crossEntropy], [kldivergence])).transpose() # write results to file statsfilename = os.path.join(pathResults, "sg_dtrees_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(stats), 'names': ['sampleSize', 'l2error', 'crossEntropy', 'KLDivergence']}) return dtreesDist
def writeStats(self, filename): for dtype in self.__learner.getKnowledgeTypes(): stats = self.computeStats(dtype) suffix = KnowledgeTypes.toString(dtype) stats['filename'] = "%s.%s.stats.arff" % (filename, suffix) writeDataARFF(stats)
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join(pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join(pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join(pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join(pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join(pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join(pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError('optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = {'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy} # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join(pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- return ans
def writeMoments(self, filename, *args, **kws): stats = self.computeMoments(*args, **kws) stats['filename'] = filename + ".moments.arff" writeDataARFF(stats)