def computeTrilinearFormByRow(self, gpsk, basisk, gpi, basisi, gpj, basisj): """ Compute the trilinear form of two grid point with a list of grid points @param gpk: list of HashGridIndex @param basisk: SG++ Basis for grid indices k @param gpi: HashGridIndex @param basisi: SG++ Basis for grid indices i @param gpj: HashGridIndex @param basisj: SG++ Basis for grid indices j @return DataVector """ b = DataVector(len(gpsk)) b.setAll(1.0) err = 0. # run over all entries for k, gpk in enumerate(gpsk): # run over all dimensions for d in xrange(gpi.dim()): # compute trilinear form for one entry value, erri = self.getTrilinearFormEntry(gpk, basisk, gpi, basisi, gpj, basisj, d) b[k] *= value err += erri return b, err
def plotSG3d(grid, alpha, n=50, f=lambda x: x): fig = plt.figure() ax = fig.gca(projection='3d') X = np.linspace(0, 1, n) Y = np.linspace(0, 1, n) X, Y = np.meshgrid(X, Y) Z = np.zeros(n * n).reshape(n, n) for i in xrange(len(X)): for j, (x, y) in enumerate(zip(X[i], Y[i])): Z[i, j] = f(evalSGFunction(grid, alpha, DataVector([x, y]))) # get grid points gs = grid.getStorage() gps = np.zeros([gs.size(), 2]) p = DataVector(2) for i in xrange(gs.size()): gs.get(i).getCoords(p) gps[i, :] = p.array() surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.scatter(gps[:, 0], gps[:, 1], np.zeros(gs.size())) ax.set_xlim(0, 1) ax.set_ylim(0, 1) # ax.set_zlim(0, 2) fig.colorbar(surf, shrink=0.5, aspect=5) return fig, ax, Z
def currentDiagHess(self, params): #return np.ones(params.shape) # if hasattr(self, 'H'): # return self.H # op_l2_dot = createOperationLTwoDotProduct(self.grid) # self.H = np.empty((self.grid.getSize(), self.grid.getSize())) # u = DataVector(self.grid.getSize()) # u.setAll(0.0) # result = DataVector(self.grid.getSize()) # for grid_idx in xrange(self.grid.getSize()): # u[grid_idx] = 1.0 # op_l2_dot.mult(u, result) # self.H[grid_idx,:] = result.array() # u[grid_idx] = 0.0 # self.H = np.diag(self.H).reshape(1,-1) # return self.H #import ipdb; ipdb.set_trace() size = self._lastseen.shape[0] data_matrix = DataMatrix(self._lastseen[:,:self.dim]) mult_eval = createOperationMultipleEval(self.grid, data_matrix); params_DV = DataVector(self.grid.getSize()) params_DV.setAll(0.) results_DV = DataVector(size) self.H = np.zeros(self.grid.getSize()) for i in xrange(self.grid.getSize()): params_DV[i] = 1.0 mult_eval.mult(params_DV, results_DV); self.H[i] = results_DV.l2Norm()**2 params_DV[i] = 0.0 self.H = self.H.reshape(1,-1)/size #import ipdb; ipdb.set_trace() return self.H
def gradient_fun(self, params): ''' Compute the gradient vector in the current state ''' #import ipdb; ipdb.set_trace() # gradient_array = np.empty((self.batch_size, self.grid.getSize())) for sample_idx in xrange(self.batch_size): x = self._lastseen[sample_idx, :self.dim] y = self._lastseen[sample_idx, self.dim] params_DV = DataVector(params) gradient = DataVector(len(params_DV)) single_alpha = DataVector(1) single_alpha[0] = 1 data_matrix = DataMatrix(x.reshape(1,-1)) mult_eval = createOperationMultipleEval(self.grid, data_matrix); mult_eval.multTranspose(single_alpha, gradient); residual = gradient.dotProduct(params_DV) - y; gradient.mult(residual); #import ipdb; ipdb.set_trace() # gradient_array[sample_idx, :] = gradient.array() return gradient_array
def generateLaplaceMatrix(factory, level, verbose=False): from pysgpp import DataVector storage = factory.getStorage() gen = factory.createGridGenerator() gen.regular(level) laplace = factory.createOperationLaplace() # create vector alpha = DataVector(storage.size()) erg = DataVector(storage.size()) # create stiffness matrix m = DataVector(storage.size(), storage.size()) m.setAll(0) for i in xrange(storage.size()): # apply unit vectors alpha.setAll(0) alpha[i] = 1 laplace.mult(alpha, erg) if verbose: print erg, erg.sum() m.setColumn(i, erg) return m
def testOperationB(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(2) alpha = DataVector(factory.getStorage().size()) p = DataMatrix(1,1) beta = DataVector(1) alpha.setAll(0.0) p.set(0,0,0.25) beta[0] = 1.0 opb = factory.createOperationB() opb.mult(beta, p, alpha) self.failUnlessAlmostEqual(alpha[0], 0.75) self.failUnlessAlmostEqual(alpha[1], 0.25) self.failUnlessAlmostEqual(alpha[2], 0.5) self.failUnlessAlmostEqual(alpha[3], 1.0) self.failUnlessAlmostEqual(alpha[4], 0.0) alpha.setAll(0.0) alpha[2] = 1.0 p.set(0,0, 0.25) beta[0] = 0.0 opb.multTranspose(alpha, p, beta) self.failUnlessAlmostEqual(beta[0], 0.5)
def testOperationTest_test(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(1) alpha = DataVector(factory.getStorage().size()) data = DataMatrix(1,1) data.setAll(0.25) classes = DataVector(1) classes.setAll(1.0) testOP = factory.createOperationTest() alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = 1.0 c = testOP.test(alpha, data, classes) self.failUnless(c > 0.0) alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = -1.0 c = testOP.test(alpha, data, classes) self.failUnless(c == 0.0)
def serializeToFile(self, memento, filename): fstream = self.gzOpen(filename, "w") try: figure = plt.figure() grid = memento storage = grid.getStorage() coord_vector = DataVector(storage.dim()) points = zeros([storage.size(), storage.dim()]) for i in xrange(storage.size()): point = storage.get(i) point.getCoords(coord_vector) points[i] = [j for j in coord_vector.array()] num_of_sublots = storage.dim()*(storage.dim()-1)/2 rows = int(ceil(sqrt(num_of_sublots))) cols = int(floor(sqrt(num_of_sublots))) i = 1 for x1 in xrange(1,storage.dim()): for x2 in xrange(2,storage.dim()+1): figure.add_subplot(rows*100 + cols*10 + i) figure.add_subplot(rows, cols, i) plt.xlabel('x%d'%x1, figure=figure) plt.ylabel('x%d'%x2, figure=figure) plt.scatter(points[:,x1-1], points[:,x2-1], figure=figure) i +=1 plt.savefig(fstream, figure=figure) plt.close(figure) finally: fstream.close()
def computeMoments(self, ts=None): names = ['time', 'iteration', 'grid_size', 'mean', 'meanDiscretizationError', 'var', 'varDiscretizationError'] # parameters ts = self.__samples.keys() nrows = len(ts) ncols = len(names) data = DataMatrix(nrows, ncols) v = DataVector(ncols) row = 0 for t in ts: v.setAll(0.0) v[0] = t v[1] = 0 v[2] = len(self.__samples[t].values()) v[3], v[4] = self.mean(ts=[t]) v[5], v[6] = self.var(ts=[t]) # write results to matrix data.setRow(row, v) row += 1 return {'data': data, 'names': names}
def var(self, grid, alpha, U, T, mean): r""" Estimate the expectation value using Monte-Carlo. \frac{1}{N}\sum\limits_{i = 1}^N (f_N(x_i) - E(f))^2 where x_i \in \Gamma """ # init _, W = self._extractPDFforMomentEstimation(U, T) moments = np.zeros(self.__npaths) vecMean = DataVector(self.__n) vecMean.setAll(mean) for i in xrange(self.__npaths): samples = self.__getSamples(W, T, self.__n) res = evalSGFunctionMulti(grid, alpha, samples) res.sub(vecMean) res.sqr() # compute the moment moments[i] = res.sum() / (len(res) - 1.) # error statistics err = np.Inf # calculate moment return np.sum(moments) / self.__npaths, err
def mean(self, grid, alpha, U, T): r""" Extraction of the expectation the given sparse grid function interpolating the product of function value and pdf. \int\limits_{[0, 1]^d} f_N(x) * pdf(x) dx """ # extract correct pdf for moment estimation vol, W = self._extractPDFforMomentEstimation(U, T) D = T.getTransformations() # compute the integral of the product gs = grid.getStorage() acc = DataVector(gs.size()) acc.setAll(1.) tmp = DataVector(gs.size()) err = 0 # run over all dimensions for i, dims in enumerate(W.getTupleIndices()): dist = W[i] trans = D[i] # get the objects needed for integration the current dimensions gpsi, basisi = project(grid, dims) if isinstance(dist, SGDEdist): # if the distribution is given as a sparse grid function we # need to compute the bilinear form of the grids # accumulate objects needed for computing the bilinear form gpsj, basisj = project(dist.grid, range(len(dims))) # compute the bilinear form bf = BilinearGaussQuadratureStrategy() A, erri = bf.computeBilinearFormByList(gpsi, basisi, gpsj, basisj) # weight it with the coefficient of the density function self.mult(A, dist.alpha, tmp) else: # the distribution is given analytically, handle them # analytically in the integration of the basis functions if isinstance(dist, Dist) and len(dims) > 1: raise AttributeError('analytic quadrature not supported for multivariate distributions') if isinstance(dist, Dist): dist = [dist] trans = [trans] lf = LinearGaussQuadratureStrategy(dist, trans) tmp, erri = lf.computeLinearFormByList(gpsi, basisi) # print error stats # print "%s: %g -> %g" % (str(dims), err, err + D[i].vol() * erri) # import ipdb; ipdb.set_trace() # accumulate the error err += D[i].vol() * erri # accumulate the result acc.componentwise_mult(tmp) moment = alpha.dotProduct(acc) return vol * moment, err
def setUp(self): self.grid = Grid.createLinearGrid(2) # a simple 2D grid self.grid.createGridGenerator().regular(3) # max level 3 => 17 points self.HashGridStorage = self.grid.getStorage() alpha = DataVector(self.grid.getSize()) alpha.setAll(1.0) for i in [9, 10, 11, 12]: alpha[i] = 0.0 coarseningFunctor = SurplusCoarseningFunctor(alpha, 4, 0.5) self.grid.createGridGenerator().coarsen(coarseningFunctor, alpha)
def calc_indicator_value(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) col = DataVector(numData) alpha.__setitem__(seq, 1.0) self.multEval.mult(alpha, col) col.sqr() denom = col.sum() if denom == 0: print "Denominator is zero" value = 0 else: value = num/denom return value
def general_test(self, d, l, bb, xs): test_desc = "dim=%d, level=%d, len(x)=%s" % (d, l, len(xs)) print test_desc self.grid = Grid.createLinearGrid(d) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(l) alpha = DataVector([self.get_random_alpha() for i in xrange(self.grid.getSize())]) bb_ = BoundingBox(d) for d_k in xrange(d): dimbb = DimensionBoundary() dimbb.leftBoundary = bb[d_k][0] dimbb.rightBoundary = bb[d_k][1] bb_.setBoundary(d_k, dimbb) # Calculate the expected value without the bounding box expected_normal = [self.calc_exp_value_normal(x, d, bb, alpha) for x in xs] #expected_transposed = [self.calc_exp_value_transposed(x, d, bb, alpha) for x in xs] # Now set the bounding box self.grid.getStorage().setBoundingBox(bb_) dm = DataMatrix(len(xs), d) for k, x in enumerate(xs): dv = DataVector(x) dm.setRow(k, dv) multEval = createOperationMultipleEval(self.grid, dm) actual_normal = DataVector(len(xs)) #actual_transposed = DataVector(len(xs)) multEval.mult(alpha, actual_normal) #multEval.mult(alpha, actual_transposed) actual_normal_list = [] for k in xrange(len(xs)): actual_normal_list.append(actual_normal.__getitem__(k)) #actual_transposed_list = [] #for k in xrange(len(xs)): # actual_transposed_list.append(actual_transposed.__getitem__(k)) self.assertAlmostEqual(actual_normal_list, expected_normal) #self.assertAlmostEqual(actual_tranposed_list, expected_tranposed) del self.grid
def testDotProduct(self): from pysgpp import DataVector x = 0 d = DataVector(3) for i in xrange(len(d)): d[i] = i + 1 x += d[i] * d[i] self.assertEqual(d.dotProduct(d), x)
def getCollocationNodes(self): """ Create a set of all collocation nodes """ gs = self.grid.getStorage() ps = np.ndarray([gs.size(), gs.dim()], dtype='float32') p = DataVector(gs.dim()) for i in xrange(gs.size()): gs.get(i).getCoords(p) ps[i, :] = p.array() return ps
def nextSamples(self, n=1): p = DataVector(self._dim) ans = Samples(self._params, dtype=DistributionType.UNITUNIFORM) U = self._params.activeParams().getIndependentJointDistribution() for _ in xrange(n): self.__genObj.getSample(p) # transform it to the probabilistic space q = U.ppf(p.array()) # add it to the output ans.add(q, dtype=SampleType.ACTIVEPROBABILISTIC) return ans
def setUp(self): # # Grid # DIM = 2 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1/DELTA) for i in xrange(DELTA_RECI): for j in xrange(DELTA_RECI): xs.append([DELTA*i, DELTA*j]) random.seed(1208813) ys = [ random.randint(-10, 10) for i in xrange(DELTA_RECI**2)] # print xs # print ys self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) for i in xrange(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__ (i, self.classes[i] - self.grid.eval(self.alpha, coord)) #print "Errors:" #print self.errors # # Functor # self.functor = WeightedErrorRefinementFunctor(self.alpha, self.grid) self.functor.setTrainDataset(self.trainData) self.functor.setClasses(self.classes) self.functor.setErrors(self.errors)
def setUp(self): self.size = 42 self.dim = 5 self.container = DataContainer(size=self.size,dim=self.dim) values = self.container.getValues() points = self.container.getPoints() self.vectors = [] for row in xrange(0,self.size): vector = DataVector(self.dim) vector.setAll(row) self.vectors.append(vector) points.setRow(row,vector) values[row] =row
def test_InconsistentRefinement1Point(self): """Dimensionally adaptive refinement using surplus coefficients as local error indicator and inconsistent hash refinement. """ # point ((3,7), (1,1)) (middle most right) gets larger surplus coefficient alpha = DataVector(self.grid.getSize()) alpha.setAll(1.0) alpha[12] = 2.0 functor = SurplusRefinementFunctor(alpha, 1, 0.0) refinement = HashRefinementInconsistent() refinement.free_refine(self.HashGridStorage, functor) self.assertEqual(self.grid.getSize(), 17)
def writeSensitivityValues(self, filename): def keymap(key): names = self.getLearner().getParameters().activeParams().getNames() ans = [names[i] for i in key] return ",".join(ans) # parameters ts = self.__knowledge.getAvailableTimeSteps() gs = self.__knowledge.getGrid(self._qoi).getStorage() n = len(ts) n1 = gs.dim() n2 = 2 ** n1 - 1 data = DataMatrix(n, n1 + n2 + 1) names = ['time'] + [None] * (n1 + n2) for k, t in enumerate(ts): # estimated anova decomposition anova = self.getAnovaDecomposition(t=t) me = anova.getSobolIndices() if len(me) != n2: import ipdb; ipdb.set_trace() n2 = len(me) te = anova.getTotalEffects() n1 = len(te) v = DataVector(n1 + n2 + 1) v.setAll(0.0) v[0] = t for i, key in enumerate(anova.getSortedPermutations(te.keys())): v[i + 1] = te[key] if k == 0: names[i + 1] = '"$T_{' + keymap(key) + '}$"' for i, key in enumerate(anova.getSortedPermutations(me.keys())): v[n1 + i + 1] = me[key] if k == 0: names[n1 + 1 + i] = '"$S_{' + keymap(key) + '}$"' data.setRow(k, v) writeDataARFF({'filename': filename + ".sa.stats.arff", 'data': data, 'names': names})
def dehierarchizeList(grid, alpha, gps): """ evaluate sparse grid function at grid points in gps @param grid: Grid @param alpha: DataVector @param gps: list of HashGridIndex """ dim = grid.getStorage().dim() p = DataVector(dim) nodalValues = DataVector(len(gps)) A = DataMatrix(len(gps), dim) for i, gp in enumerate(gps): gp.getCoords(p) A.setRow(i, p) createOperationMultipleEval(grid, A).mult(alpha, nodalValues) return nodalValues
def __updateContainer(self, points, values, dataDict, specification, name): if name in self.points: currentPoints = self.points[name] n = currentPoints.getNrows() m = points.getNrows() numDims = points.getNcols() currentPoints.resizeRows(n + m) x = DataVector(numDims) for i in range(m): points.getRow(i, x) currentPoints.setRow(n + i, x) else: self.points[name] = points if name in self.values: currentValues = self.values[name] n = len(currentValues) m = len(values) currentValues.resize(n + m) for i in range(m): currentValues[n + i] = values[i] else: self.values[name] = values if name in self.dataDict: for x, value in list(dataDict.items()): self.dataDict[name][x] = value else: self.dataDict[name] = dataDict return self
def rank(self, grid, gp, alphas, *args, **kws): gs = grid.getStorage() x = DataVector(gs.getDimension()) gs.getCoordinates(gp, x) opEval = createOperationEvalNaive(grid) return abs(opEval.eval(alphas, x) - self.f(x))
def update(self, grid, v, gpi, params, *args, **kws): # get grid point associated to ix gs = grid.getStorage() p = DataVector(gs.getDimension()) gs.getCoordinates(gpi, p) # get joint distribution ap = params.activeParams() U = ap.getIndependentJointDistribution() T = ap.getJointTransformation() q = T.unitToProbabilistic(p.array()) # scale surplus by probability density ix = gs.getSequenceNumber(gpi) return np.abs(v[ix]) * U.pdf(q)
def setUp(self): from pysgpp import DataVector import random ## number of rows self.nrows = 5 ## number of columns self.ncols = 4 ## number of entries self.N = self.nrows*self.ncols ## random list of lists self.l_rand = [[2*(random.random()-0.5) for j in xrange(self.ncols)] for i in xrange(self.nrows)] ## same as l_rand, but flattened self.l_rand_total = [] for li in self.l_rand: self.l_rand_total.extend(li) # ## Data Vector, corresponding to l_rand # self.d_rand = DataVector(self.nrows,self.ncols) # for i in xrange(self.N): # self.d_rand[i] = self.l_rand_total[i] # # for i in xrange(self.N): # self.assertEqual(self.d_rand[i], self.l_rand_total[i]) ## Data Vector, corresponding to l_rand self.d_rand = DataVector(self.N) for i in xrange(self.N): self.d_rand[i] = self.l_rand_total[i] for i in xrange(self.N): self.assertEqual(self.d_rand[i], self.l_rand_total[i])
def plotGrid3dSlices(grid): gs = grid.getStorage() p = DataVector(3) d = {} for i in range(gs.getSize()): gp = gs.getPoint(i) gs.getCoordinates(gp, p) if p[2] in d: d[p[2]].append([p[0], p[1], gp.getLevel(0), gp.getLevel(1), gp.getIndex(0), gp.getIndex(1)]) else: d[p[2]] = [[p[0], p[1], gp.getLevel(0), gp.getLevel(1), gp.getIndex(0), gp.getIndex(1)]] print(sum([len(dd) for dd in list(d.values())])) for z, items in list(d.items()): fig = plt.figure() plt.xlim(0, 1) plt.ylim(0, 1) plt.title('z = %g %s, len=%i' % (z, "(border)" if hasBorder(grid) else "", len(items))) for x, y, l0, l1, i0, i1 in items: plt.plot(x, y, marker='o', color='blue') plt.text(x, y, "(%i, %i), (%i, %i)" % (l0, l1, i0, i1), horizontalalignment='center') fig.show()
def __getSamples(self, W, T, n): if self.samples is None: # draw n ans ans = W.rvs(n) # transform them to the unit hypercube ans = DataMatrix(n, W.getDim()) for i, sample in enumerate(ans): p = T.probabilisticToUnit(sample) ans.setRow(i, DataVector(p)) return ans else: if self.samples.shape[0] == n: dataSamples = self.samples else: ixs = np.random.randint(0, len(self.samples), n) dataSamples = self.samples[ixs, :] # check if there are just samples for a subset of the random # variables. If so, add the missing ones if self.__ixs is not None: ans = W.rvs(n) # transform them to the unit hypercube for i, sample in enumerate(dataSamples): ans[i, :] = T.probabilisticToUnit(ans[i, :]) ans[i, self.__ixs] = sample else: ans = dataSamples return DataMatrix(ans)
def eval_fullGrid(level, dim, border=True): if border: grid = Grid.createLinearBoundaryGrid(dim, 1) else: grid = Grid.createLinearGrid(dim) grid.getGenerator().full(level) gs = grid.getStorage() ans = np.ndarray((gs.getSize(), dim)) p = DataVector(dim) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) ans[i, :] = p.array() return ans
def plotCandidates(self, candidates): for gp in candidates: p = DataVector(gp.getDimension()) gp.getStandardCoordinates(p) plt.plot(p[0], p[1], "x ", color="green") plt.xlim(0, 1) plt.ylim(0, 1)
def checkPositivity(grid, alpha): # define a full grid of maxlevel of the grid gs = grid.getStorage() fullGrid = Grid.createLinearGrid(gs.dim()) fullGrid.createGridGenerator().full(gs.getMaxLevel()) fullHashGridStorage = fullGrid.getStorage() A = DataMatrix(fullHashGridStorage.size(), fullHashGridStorage.dim()) p = DataVector(gs.dim()) for i in xrange(fullHashGridStorage.size()): fullHashGridStorage.get(i).getCoords(p) A.setRow(i, p) res = evalSGFunctionMulti(grid, alpha, A) ymin, ymax, cnt = 0, -1e10, 0 for i, yi in enumerate(res.array()): if yi < 0. and abs(yi) > 1e-13: cnt += 1 ymin = min(ymin, yi) ymax = max(ymax, yi) A.getRow(i, p) print " %s = %g" % (p, yi) if cnt > 0: print "warning: function is not positive" print "%i/%i: [%g, %g]" % (cnt, fullHashGridStorage.size(), ymin, ymax) return cnt == 0
def writeCSV(filename, samples, delim=' '): fd = open(filename, 'w') p = DataVector(samples.getNcols()) for i in range(samples.getNrows()): samples.getRow(i, p) fd.write(delim.join(str(p)[1:-1].split(', ')) + '\n') fd.close()
def __init__(self, trainData, samples=None, testData=None, bandwidths=None, transformation=None, surfaceFile=None): super(LibAGFDist, self).__init__() self.trainData = DataMatrix(trainData) self.testData = testData self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])] if len(self.bounds) == 1: self.bounds = self.bounds[0] if transformation is not None: self.bounds = [trans.getBounds() for trans in transformation.getTransformations()] self.dim = trainData.shape[1] self.samples = samples self.transformation = transformation self.bandwidths = None if bandwidths is not None: self.bandwidths = bandwidths else: op = createOperationInverseRosenblattTransformationKDE(self.trainData) self.bandwidths = DataVector(self.dim) op.getOptKDEbdwth(self.bandwidths) self.surfaceFile = surfaceFile
def testSaveLearnedKnowledge(self): testValues = [ -0.0310651210442, -0.618841896127, 0.649230972775, 0.649230972775, -0.618841896127 ] alpha = DataVector(len(testValues)) for i in range(len(testValues)): alpha[i] = testValues[i] learnedKnowledge = LearnedKnowledge() learnedKnowledge.update(alpha) controller = CheckpointController("saveknowledge", pathlocal) controller.setLearnedKnowledge(learnedKnowledge) controller.saveLearnedKnowledge(0) sampleLines = list() f = gzip.open(pathlocal + "/saveknowledge.0.arff.gz", "r") try: for line in f.readlines(): if len(line) > 1 and "@" not in line: sampleLines.append(float(line)) finally: f.close() self.assertEqual(testValues, [float(i) for i in sampleLines])
def testFreeRefineTrapezoidBoundaries(self): """Tests surplus based refine for Hash-Storage""" from pysgpp import GridStorage, HashGenerator from pysgpp import SurplusRefinementFunctor, HashRefinementBoundaries, DataVector s = GridStorage(2) g = HashGenerator() g.regularWithBoundaries(s, 1, True) d = DataVector(9) d[0] = 0.0 d[1] = 0.0 d[2] = 0.0 d[3] = 0.0 d[4] = 0.0 d[5] = 0.0 d[6] = 0.0 d[7] = 0.0 d[8] = 1.0 f = SurplusRefinementFunctor(d) r = HashRefinementBoundaries() r.free_refine(s, f) self.failUnlessEqual(s.size(), 21)
def update(self, grid, alpha, qoi, t, dtype, iteration): """ Update the knowledge @param grid: Grid @param alpha: DataVector surplus vector @param qoi: string quantity of interest @param t: float time step @param dtype: KnowledgeType @param iteration: int iteration number """ # build dictionary if iteration not in self.__alphas: self.__alphas[iteration] = {} self.__grids[iteration] = {} if qoi not in self.__alphas[iteration]: self.__alphas[iteration][qoi] = {} self.__grids[iteration][qoi] = {} if dtype not in self.__alphas[iteration][qoi]: self.__alphas[iteration][qoi][dtype] = {} if t not in self.__alphas[iteration][qoi][dtype]: self.__alphas[iteration][qoi][dtype][t] = {} # store knowledge self.__iteration = iteration self.__grids[iteration][qoi] = copyGrid(grid) self.__alphas[iteration][qoi][dtype][t] = DataVector(alpha)
def testLoadData(self): testPoints = [[0.307143, 0.130137, 0.050000], [0.365584, 0.105479, 0.050000], [0.178571, 0.201027, 0.050000], [0.272078, 0.145548, 0.050000], [0.318831, 0.065411, 0.050000], [0.190260, 0.086986, 0.050000], [0.190260, 0.062329, 0.072500], [0.120130, 0.068493, 0.072500], [0.225325, 0.056164, 0.072500], [0.213636, 0.050000, 0.072500]] testValues = [ -1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, -1.000000, -1.000000, -1.000000, -1.000000 ] filename = pathlocal + '/datasets/liver-disorders_normalized.arff.gz' adapter = ARFFAdapter(filename) container = adapter.loadData() points = container.getPoints() values = container.getValues() size = len(testPoints) dim = len(testPoints[0]) testVector = DataVector(dim) for rowIdx in xrange(size): points.getRow(rowIdx, testVector) for colIdx in xrange(dim): if cvar.USING_DOUBLE_PRECISION: self.assertEqual(testVector[colIdx], testPoints[rowIdx][colIdx]) else: self.assertAlmostEqual(testVector[colIdx], testPoints[rowIdx][colIdx]) self.assertEqual(values[rowIdx], testValues[rowIdx])
def update(self, grid, v, admissibleSet): # prepare data gpsi = admissibleSet.values() # prepare list of grid points gs = grid.getStorage() gpsj = [None] * gs.size() for i in xrange(gs.size()): gpsj[i] = gs.get(i) # compute stiffness matrix for next run basis = getBasis(grid) A = self._strategy.computeBilinearFormByList(basis, gpsi, gpsj) # compute the expectation value term for the new points b = self._strategy.computeBilinearFormIdentity(basis, gpsi) # estimate missing coefficients w = DataVector(admissibleSet.getSize()) for i, gp in enumerate(admissibleSet.values()): w[i] = estimateSurplus(grid, gp, v) # w[i] = estimateConvergence(grid, gp, v) # update the ranking values = self.__computeRanking(v, w, A, b) self._ranking = {} for i, gpi in enumerate(admissibleSet.values()): self._ranking[gpi.hash()] = values[i]
def get_result(self): self._learner = self._builder.andGetResult()#.withProgressPresenter(InfoToScreen()) gs = self._learner.grid.getStorage() alpha = DataVector(gs.getSize(), 0.0) self._learner.errors = alpha self._learner.refineGrid() self._learner.learnData()
def readReferenceMatrix(storage, filename): from pysgpp import DataVector # read reference matrix try: fd = gzOpen(filename, 'r') except IOError as e: fd = None if not fd: fd = gzOpen('../tests/' + filename, 'r') dat = fd.read().strip() fd.close() dat = dat.split('\n') dat = [l.strip().split(None) for l in dat] #print len(dat) #print len(dat[0]) m_ref = DataVector(len(dat), len(dat[0])) for i in range(len(dat)): for j in range(len(dat[0])): #print float(dat[i][j]) m_ref[i * len(dat[0]) + j] = float(dat[i][j]) return m_ref
def plotGrid2d(grid, alpha=None): gs = grid.getStorage() gps = {'p': np.zeros([0, 2]), 'n': np.zeros([0, 2])} p = DataVector(2) for i in xrange(gs.size()): gs.get(i).getCoords(p) if alpha is None or alpha[i] >= 0: gps['p'] = np.vstack((gps['p'], p.array())) else: gps['n'] = np.vstack((gps['n'], p.array())) # plot the grid points plt.plot(gps['p'][:, 0], gps['p'][:, 1], "^ ", color='red') plt.plot(gps['n'][:, 0], gps['n'][:, 1], "v ", color='red') plt.xlim(0, 1) plt.ylim(0, 1)
def testCreateNullVector(self): vector = self.container.createNullVector(self.size, self.dim) entry = DataVector(self.dim) for row in xrange(self.size): vector.getRow(row, entry) for index in xrange(self.dim): self.assertEqual(entry[index], 0)
def refineGrid(grid, alpha, f, refnums): """ This function refines a sparse grid function refnum times. Arguments: grid -- Grid sparse grid from pysgpp alpha -- DataVector coefficient vector f -- function to be interpolated refnums -- int number of refinement steps Return nothing """ gs = grid.getStorage() gridGen = grid.getGenerator() x = DataVector(gs.getDimension()) for _ in range(refnums): # refine a single grid point each time gridGen.refine(SurplusRefinementFunctor(alpha, 1)) # extend alpha vector (new entries uninitialized) alpha.resizeZero(gs.getSize()) # set function values in alpha for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), x) alpha[i] = f(x) # hierarchize createOperationHierarchisation(grid).doHierarchisation(alpha)
def hierarchizeEvalHierToTop(grid, nodalValues): gs = grid.getStorage() numDims = gs.getDimension() # load a new empty grid which we fill step by step newGrid = grid.createGridOfEquivalentType() newGs = newGrid.getStorage() alpha = np.ndarray(1) # add root node to the new grid newGs.insert(gs.getPoint(0)) alpha[0] = nodalValues[0] # sort points by levelsum ixs = {} for i in range(gs.getSize()): levelsum = gs.getPoint(i).getLevelSum() # skip root node if levelsum > numDims: if levelsum in ixs: ixs[levelsum].append(i) else: ixs[levelsum] = [i] # run over the grid points by level sum x = DataVector(numDims) for levelsum in np.sort(list(ixs.keys())): # add the grid points of the current level to the new grid newixs = [None] * len(ixs[levelsum]) for i, ix in enumerate(ixs[levelsum]): newixs[i] = (newGs.insert(gs.getPoint(ix)), nodalValues[ix]) # update the alpha values alpha = np.append(alpha, np.zeros(newGs.getSize() - len(alpha))) newAlpha = np.copy(alpha) for ix, nodalValue in newixs: gs.getCoordinates(newGs.getPoint(ix), x) alpha[ix] = nodalValue - evalSGFunction(newGrid, newAlpha, x.array()) del x # store alphas according to indices of grid ans = np.ndarray(gs.getSize()) for i in range(gs.getSize()): j = newGs.getSequenceNumber(gs.getPoint(i)) ans[i] = alpha[j] return ans
def makeAddedNodalValuesPositive(self, grid, alpha, addedGridPoints, tol=-1e-14): neg = [] gs = grid.getStorage() x = DataVector(gs.getDimension()) for gp in addedGridPoints: gp.getStandardCoordinates(x) yi = evalSGFunction(grid, alpha, x.array()) if yi < tol: i = gs.getSequenceNumber(gp) alpha[i] -= yi assert alpha[i] > -1e-14 assert evalSGFunction(grid, alpha, x.array()) < 1e-14 return alpha
def evalSGFunctionMulti(grid, alpha, A): if not isinstance(A, DataMatrix): raise AttributeError('A has to be a DataMatrix') size = A.getNrows() opEval = createOperationMultipleEval(grid, A) res = DataVector(size) opEval.mult(alpha, res) return res
def getDataSubsetByIndexList(self, indices, name="train"): size = len(indices) subset_points = DataMatrix(size, self.dim) subset_values = DataVector(size) row = DataVector(self.dim) points = self.getPoints() values = self.getValues() i = 0 for index in indices: points.getRow(index, row) subset_points.setRow(i, row) subset_values[i] = values[index] i = i + 1 return DataContainer(points=subset_points, values=subset_values, name=name)
def evalError(self, data, alpha): size = data.getPoints().getNrows() if size == 0: return 0 self.error = DataVector(size) self.specification.getBOperator(data.getName()).mult(alpha, self.error) self.error.sub(data.getValues()) # error vector self.error.sqr() # entries squared errorsum = self.error.sum() mse = errorsum / size # MSE # calculate error per basis function self.errors = DataVector(len(alpha)) self.specification.getBOperator(data.getName()).multTranspose(self.error, self.errors) self.errors.componentwise_mult(alpha) return mse
def getDataSubsetByCategory(self, category): if category in self.points and category in self.values: result = DataContainer(points=DataMatrix(self.points[category]), values=DataVector(self.values[category]), name=category) return result else: raise Exception("Requested category name doesn't exist")
def lookupFullGridPoints(self, grid, alpha, candidates): acc = [] gs = grid.getStorage() p = DataVector(gs.getDimension()) opEval = createOperationEval(grid) # TODO: find local max level for adaptively refined grids maxLevel = gs.getMaxLevel() if grid.getType() in [GridType_LinearBoundary, GridType_PolyBoundary]: maxLevel += 1 alphaVec = DataVector(alpha) for gp in candidates: for d in range(gs.getDimension()): if 0 < gp.getLevel(d) < maxLevel: self.lookupFullGridPointsRec1d(grid, alphaVec, gp, d, p, opEval, maxLevel, acc) return acc
def computeMoments(self, iterations=None, ts=None): names = [ 'time', 'iteration', 'grid_size', 'mean', 'meanDiscretizationError', 'meanConfidenceIntervalBootstrapping_lower', 'meanConfidenceIntervalBootstrapping_upper', 'var', 'varDiscretizationError', 'varConfidenceIntervalBootstrapping_lower', 'varConfidenceIntervalBootstrapping_upper' ] # parameters if ts is None: ts = self.__knowledge.getAvailableTimeSteps() if iterations is None: iterations = self.__knowledge.getAvailableIterations() nrows = len(ts) * len(iterations) ncols = len(names) data = DataMatrix(nrows, ncols) v = DataVector(ncols) row = 0 for t in ts: for iteration in iterations: size = self.__knowledge.getGrid(qoi=self._qoi, iteration=iteration).getSize() mean = self.mean(ts=[t], iterations=[iteration], totalNumIterations=len(iterations)) var = self.var(ts=[t], iterations=[iteration], totalNumIterations=len(iterations)) v.setAll(0.0) v[0] = t v[1] = iteration v[2] = size v[3], v[4] = mean["value"], mean["err"] v[5], v[6] = mean["confidence_interval"] v[7], v[8] = var["value"], var["err"] v[9], v[10] = var["confidence_interval"] # write results to matrix data.setRow(row, v) row += 1 return {'data': data, 'names': names}
def computeErrors(jgrid, jalpha, grid1, alpha1, grid2, alpha2, n=200): """ Compute some errors to estimate the quality of the interpolation. @param jgrid: Grid, new discretization @param jalpha: DataVector, new surpluses @param grid1: Grid, old discretization @param alpha1: DataVector, old surpluses @param grid2: Grid, old discretization @param alpha2: DataVector, old surpluses @return: tuple(<float>, <float>), maxdrift, l2norm """ jgs = jgrid.getStorage() # create control samples samples = DataMatrix(np.random.rand(n, jgs.dim())) # evaluate the sparse grid functions jnodalValues = evalSGFunctionMulti(jgrid, jalpha, samples) # eval grids nodalValues1 = evalSGFunctionMulti(grid1, alpha1, samples) nodalValues2 = evalSGFunctionMulti(grid2, alpha2, samples) # compute errors p = DataVector(jgs.dim()) err = DataVector(n) for i in xrange(n): samples.getRow(i, p) y = nodalValues1[i] * nodalValues2[i] if abs(jnodalValues[i]) > 1e100: err[i] = 0.0 else: err[i] = abs(y - jnodalValues[i]) # get error statistics # l2 l2norm = err.l2Norm() # maxdrift err.abs() maxdrift = err.max() return maxdrift, l2norm
def test_1DNormalDist_variance(self): # prepare data U = dists.Normal(1, 2, -8, 8) # U = dists.Normal(0.5, .2, 0, 1) # define linear transformation trans = JointTransformation() a, b = U.getBounds() trans.add(LinearTransformation(a, b)) # get a sparse grid approximation grid = Grid.createPolyGrid(U.getDim(), 10) grid.getGenerator().regular(5) gs = grid.getStorage() # now refine adaptively 5 times p = DataVector(gs.getDimension()) nodalValues = np.ndarray(gs.getSize()) # set function values in alpha for i in range(gs.getSize()): gs.getPoint(i).getStandardCoordinates(p) nodalValues[i] = U.pdf(trans.unitToProbabilistic(p.array())) # hierarchize alpha = hierarchize(grid, nodalValues) dist = SGDEdist(grid, alpha, bounds=U.getBounds()) fig = plt.figure() plotDensity1d(U, alpha_value=0.1, mean_label="$\mathbb{E}", interval_label="$\alpha=0.1$") fig.show() fig = plt.figure() plotDensity1d(dist, alpha_value=0.1, mean_label="$\mathbb{E}", interval_label="$\alpha=0.1$") fig.show() print("1d: mean = %g ~ %g" % (U.mean(), dist.mean())) print("1d: var = %g ~ %g" % (U.var(), dist.var())) plt.show()
def testOperationB(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(2) alpha = DataVector(factory.getStorage().size()) p = DataMatrix(1, 1) beta = DataVector(1) alpha.setAll(0.0) p.set(0, 0, 0.25) beta[0] = 1.0 opb = factory.createOperationB() opb.mult(beta, p, alpha) self.failUnlessAlmostEqual(alpha[0], 0.75) self.failUnlessAlmostEqual(alpha[1], 0.25) self.failUnlessAlmostEqual(alpha[2], 0.5) self.failUnlessAlmostEqual(alpha[3], 1.0) self.failUnlessAlmostEqual(alpha[4], 0.0) alpha.setAll(0.0) alpha[2] = 1.0 p.set(0, 0, 0.25) beta[0] = 0.0 opb.multTranspose(alpha, p, beta) self.failUnlessAlmostEqual(beta[0], 0.5)
def estimate(self, vol, grid, alpha, f, U, T): r""" Extraction of the expectation the given sparse grid function interpolating the product of function value and pdf. \int\limits_{[0, 1]^d} f(x) * pdf(x) dx """ # first: discretize f fgrid, falpha, discError = discretize(grid, alpha, f, self.__epsilon, self.__refnums, self.__pointsNum, self.level, self.__deg, True) # extract correct pdf for moment estimation vol, W, pdfError = self.__extractDiscretePDFforMomentEstimation(U, T) D = T.getTransformations() # compute the integral of the product gs = fgrid.getStorage() acc = DataVector(gs.size()) acc.setAll(1.) tmp = DataVector(gs.size()) for i, dims in enumerate(W.getTupleIndices()): sgdeDist = W[i] # accumulate objects needed for computing the bilinear form gpsi, basisi = project(fgrid, dims) gpsj, basisj = project(sgdeDist.grid, range(len(dims))) A = self.__computeMassMatrix(gpsi, basisi, gpsj, basisj, W, D) # A = ScipyQuadratureStrategy(W, D).computeBilinearForm(fgrid) self.mult(A, sgdeDist.alpha, tmp) acc.componentwise_mult(tmp) moment = falpha.dotProduct(acc) return vol * moment, discError[1] + pdfError
def __computeRanking(self, v, w, A, b): """ Compute ranking for variance estimation \argmax_{i \in \A} | w (2 Av + wb) | @param v: DataVector, coefficients of known grid points @param w: DataVector, estimated coefficients of unknown grid points @param A: DataMatrix, stiffness matrix @param b: DataVector, squared expectation value contribution @return: numpy array, contains the ranking for the given samples """ # update the ranking av = DataVector(A.getNrows()) av.setAll(0.0) # = Av for i in xrange(A.getNrows()): for j in xrange(A.getNcols()): av[i] += A.get(i, j) * v[j] av.mult(2.) # 2 * Av b.componentwise_mult(w) # w * b av.add(b) # 2 * Av + w * b w.componentwise_mult(av) # = w * (2 * Av + w * b) w.abs() # = | w * (2 * Av + w * b) | return w.array()
def loss_fun(self, params): ''' Compute the value of regularized quadratic loss function in the current state ''' if not hasattr(self, '_lastseen'): return np.inf else: params_DV = DataVector(params) residuals = [] for sample_idx in xrange(self.batch_size): x = self._lastseen[sample_idx, :self.dim] y = self._lastseen[sample_idx, self.dim] x_DV = DataVector(x) residual = self.grid.eval(params_DV, x_DV) - y residuals.append(residual*residual) regularizer = params_DV.l2Norm() regularizer = self.l*regularizer*regularizer return 0.5*np.mean(residuals) + regularizer
def computeLinearFormByList(self, gps, basis): """ Compute bilinear form for two lists of grid points @param gps: list of HashGridIndex @param basis: SG++ basis for grid indices gpsi @return: DataMatrix """ b = DataVector(len(gps)) b.setAll(1.0) err = 0. # run over all items for i, gpi in enumerate(gps): # run over all dimensions for d in xrange(gpi.dim()): # compute linear form for one entry value, erri = self.getLinearFormEntry(gpi, basis, d) # collect results b[i] *= value err += erri return b, err